Example #1
0
    def fit(self, x, y):
        """
        Perform iteration to adjust the prototype vector 
        in order to classify any new incoming points using existing data points

        Parameters
        -------
        x: input
        y: label
        """
        x, y = check_X_y(x, y)
        check_classification_targets(y)
        self.classes_ = np.unique(y)

        self.X_ = x
        self.y_ = y
        while self.epsilon >= 0.01:
            rnd_i = np.random.randint(0, len(x))
            rnd_s = x[rnd_i]
            target_y = y[rnd_i]

            self.epsilon = self.epsilon - self.epsilon_dec_factor

            closest_pvector = self.find_closest(rnd_s)[1]

            if target_y == closest_pvector.class_id:
                closest_pvector.update(rnd_s)
            else:
                closest_pvector.update(rnd_s, False)
            closest_pvector.epsilon = self.epsilon
        return self.p_vectors
    def fit(self, X, y):
        """
        Doc strings here.
        """
        check_classification_targets(y)

        if type_of_target(y) == 'multilabel-indicator':
            # Fit multilabel binary task.
            self.multilabel = True
            return self.fit_multilabel(X, y)

        num_classes = len(np.unique(y))
        self.uniform_posterior = np.ones(num_classes) / num_classes

        self.leaf_to_posterior = {}

        for leaf_id in np.unique(X):
            idxs_in_leaf = np.where(X == leaf_id)[0]
            class_counts = [
                len(np.where(y[idxs_in_leaf] == y_val)[0])
                for y_val in np.unique(y)
            ]
            posteriors = np.nan_to_num(
                np.array(class_counts) / np.sum(class_counts))

            if self.finite_sample_correction:
                posteriors = self._finite_sample_correction(
                    posteriors, len(idxs_in_leaf), len(np.unique(y)))

            self.leaf_to_posterior[leaf_id] = posteriors

        self._is_fitted = True

        return self
Example #3
0
    def fit(self, X: Union[pd.DataFrame, np.ndarray],
            y: Union[pd.Series, np.ndarray]) -> "DAGClassifier":
        """
        Fits the sm model using the concat of X and y.
        """
        # clf target check
        check_classification_targets(y)

        # encode the categories to be numeric
        enc = LabelEncoder()
        y = y.copy()
        y[:] = enc.fit_transform(y)
        # store the classes from the LabelEncoder
        self.classes_ = enc.classes_

        # class number checks
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("This solver needs samples of at least 2 classes"
                             " in the data, but the data contains only one"
                             " class: {}".format(self.classes_[0]))
        if n_classes > 2:
            raise ValueError(
                "This solver does not support more than 2 classes")

        # store the private attr __target_dist_type
        self.__target_dist_type = "bin"
        # fit the NOTEARS model
        super().fit(X, y)
        return self
Example #4
0
    def fit(self, X, y):
        """Check inputs and statistics of the sampler.

        You should use ``fit_resample`` in all cases.

        Parameters
        ----------
        X : {array-like, dataframe, sparse matrix} of shape \
                (n_samples, n_features)
            Data array.

        y : array-like of shape (n_samples,)
            Target array.

        Returns
        -------
        self : object
            Return the instance itself.
        """
        # we need to overwrite SamplerMixin.fit to bypass the validation
        if self.validate:
            check_classification_targets(y)
            X, y, _ = self._check_X_y(X, y, accept_sparse=self.accept_sparse)

        self.sampling_strategy_ = check_sampling_strategy(
            self.sampling_strategy, y, self._sampling_type)

        return self
    def _validate_y(self, y):
        y = column_or_1d(y, warn=True)
        check_classification_targets(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        self.n_classes_ = len(self.classes_)

        return y
Example #6
0
    def fit_resample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : {array-like, dataframe, sparse matrix} of shape \
                (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : array-like of shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : {array-like, dataframe, sparse matrix} of shape \
                (n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : array-like of shape (n_samples_new,)
            The corresponding label of `X_resampled`.
        """
        check_classification_targets(y)
        arrays_transformer = ArraysTransformer(X, y)
        X, y, binarize_y = self._check_X_y(X, y)

        self.sampling_strategy_ = check_sampling_strategy(
            self.sampling_strategy, y, self._sampling_type)

        output = self._fit_resample(X, y)

        y_ = label_binarize(output[1],
                            np.unique(y)) if binarize_y else output[1]

        X_, y_ = arrays_transformer.transform(output[0], y_)
        return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
Example #7
0
    def fit(self, X, y):

        check_classification_targets(y)

        le = LabelEncoder()
        y_ind = le.fit_transform(y)
        self.classes_ = classes = le.classes_
        n_classes = classes.size

        X_clas = []

        for cur_class in range(n_classes):
            center_mask = y_ind == cur_class
            sentence = ' '.join(list(chain(*X[center_mask])))
            X_clas.append(sentence)

        tfidf = TfidfVectorizer(norm=self.norm,
                                use_idf=self.use_idf,
                                smooth_idf=self.smooth_idf,
                                sublinear_tf=self.sublinear_tf)
        self.tfidf_array_ = tfidf.fit_transform(X_clas)
        self.tfidf_ = tfidf

        self.fitted = True

        return self
Example #8
0
    def fit(self, X, y):
        '''
        Fits variational Bayesian Logistic Regression
        
        Parameters
        ----------
        X: array-like of size [n_samples, n_features]
           Matrix of explanatory variables
           
        y: array-like of size [n_samples]
           Vector of dependent variables

        Returns
        -------
        self: object
           self
        '''
        # preprocess data
        X, y = check_X_y(X, y, dtype=np.float64)
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)

        # take into account bias term if required
        n_samples, n_features = X.shape
        n_features = n_features + int(self.fit_intercept)
        if self.fit_intercept:
            X = np.hstack((np.ones([n_samples, 1]), X))

        # handle multiclass problems using One-vs-Rest
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes")
        if n_classes > 2:
            self.coef_, self.sigma_ = [0] * n_classes, [0] * n_classes
            self.intercept_ = [0] * n_classes
        else:
            self.coef_, self.sigma_, self.intercept_ = [0], [0], [0]

        # huperparameters of
        a = self.a + 0.5 * n_features
        b = self.b

        for i in range(len(self.coef_)):
            if n_classes == 2:
                pos_class = self.classes_[1]
            else:
                pos_class = self.classes_[i]
            mask = (y == pos_class)
            y_bin = np.ones(y.shape, dtype=np.float64)
            y_bin[~mask] = 0
            coef_, sigma_ = self._fit(X, y_bin, a, b)
            intercept_ = 0
            if self.fit_intercept:
                intercept_ = coef_[0]
                coef_ = coef_[1:]
            self.coef_[i] = coef_
            self.intercept_[i] = intercept_
            self.sigma_[i] = sigma_
        self.coef_ = np.asarray(self.coef_)
        return self
Example #9
0
File: mcb.py Project: yuv4r4j/pyts
    def fit(self, X, y=None):
        """Compute the bin edges for each feature.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_timestamps)
            Data to transform.

        y : None or array-like, shape = (n_samples,)
            Class labels for each sample. Only used if ``strategy='entropy'``.

        """
        if self.strategy == 'entropy':
            if y is None:
                raise ValueError("y cannot be None if strategy='entropy'.")
            X, y = check_X_y(X, y, dtype='float64')
            check_classification_targets(y)
        else:
            X = check_array(X, dtype='float64')
        n_samples, n_timestamps = X.shape
        self._n_timestamps_fit = n_timestamps
        self._alphabet = self._check_params(n_samples)
        self._check_constant(X)
        self.bin_edges_ = self._compute_bins(X, y, n_timestamps, self.n_bins,
                                             self.strategy)
        return self
Example #10
0
    def _encode_y(self, y):
        """
        Encode classes into {0, ..., n_classes - 1} and sets attributes classes_,
        n_classes_ and n_trees_per_iteration_

        Parameters
        ----------
        y : ndarrray
            Array of input labels

        Returns
        -------
        output : ndarray
            Encoded array of labels
        """
        #
        # and n_trees_per_iteration_
        check_classification_targets(y)
        label_encoder = LabelEncoder()
        encoded_y = label_encoder.fit_transform(y)
        self.classes_ = label_encoder.classes_
        n_classes_ = self.classes_.shape[0]
        self._n_classes_ = n_classes_
        # only 1 tree for binary classification.
        # TODO: For multiclass classification, we build 1 tree per class.
        self.n_trees_per_iteration_ = 1 if n_classes_ <= 2 else n_classes_
        encoded_y = np.ascontiguousarray(encoded_y, dtype=np.float32)
        return encoded_y
Example #11
0
    def fit(self, X, y):
        """
        Fits transformed data X given corresponding class labels y.

        Attributes
        ---
        X : array of shape [n_samples, n_features]
            the transformed input data
        y : array of shape [n_samples]
            the class labels
        """
        check_classification_targets(y)

        num_classes = len(np.unique(y))
        self.uniform_posterior = np.ones(num_classes) / num_classes

        self.leaf_to_posterior = {}

        for leaf_id in np.unique(X):
            idxs_in_leaf = np.where(X == leaf_id)[0]
            class_counts = [
                len(np.where(y[idxs_in_leaf] == y_val)[0]) for y_val in np.unique(y)
            ]
            posteriors = np.nan_to_num(np.array(class_counts) / np.sum(class_counts))

            if self.finite_sample_correction:
                posteriors = self._finite_sample_correction(
                    posteriors, len(idxs_in_leaf), len(np.unique(y))
                )

            self.leaf_to_posterior[leaf_id] = posteriors

        self._is_fitted = True

        return self
Example #12
0
    def fit(self, X, y):
        # Check the algorithm parameters
        self._check_params()

        # Check that X and y have correct shape
        X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])

        check_classification_targets(y)

        # Encode labels
        le = preprocessing.LabelEncoder()
        le.fit(y)
        self.classes_ = le.classes_
        y_ = le.transform(y)

        # Convert to 2d array
        y_ = y_.reshape((-1, 1))

        self.n_outputs_ = y_.shape[1]

        self.n_classes_ = len(self.classes_)

        self.n_features_ = X.shape[1]

        # Classifier can't train when only one class is present.
        # Trivial case
        if self.n_classes_ == 1:
            return self

        # Get random seed
        rs_ = check_random_state(self.random_state)
        seed_ = rs_.randint(0, np.iinfo('i').max)

        # Define type of data
        fptype = getFPType(X)

        # Fit the model
        train_algo = d4p.gbt_classification_training(
            fptype=fptype,
            nClasses=self.n_classes_,
            splitMethod=self.split_method,
            maxIterations=self.max_iterations,
            maxTreeDepth=self.max_tree_depth,
            shrinkage=self.shrinkage,
            minSplitLoss=self.min_split_loss,
            lambda_=self.reg_lambda,
            observationsPerTreeFraction=self.observations_per_tree_fraction,
            featuresPerNode=self.features_per_node,
            minObservationsInLeafNode=self.min_observations_in_leaf_node,
            memorySavingMode=self.memory_saving_mode,
            maxBins=self.max_bins,
            minBinSize=self.min_bin_size,
            engine=d4p.engines_mcg59(seed=seed_))
        train_result = train_algo.compute(X, y_)

        # Store the model
        self.daal_model_ = train_result.model

        # Return the classifier
        return self
    def fit(self,X,y):
        '''
        Fits variational Bayesian Logistic Regression
        
        Parameters
        ----------
        X: array-like of size [n_samples, n_features]
           Matrix of explanatory variables
           
        y: array-like of size [n_samples]
           Vector of dependent variables

        Returns
        -------
        self: object
           self
        '''
        # preprocess data
        X,y = check_X_y( X, y , dtype = np.float64)
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)
        
        # take into account bias term if required 
        n_samples, n_features = X.shape
        n_features = n_features + int(self.fit_intercept)
        if self.fit_intercept:
            X = np.hstack( (np.ones([n_samples,1]),X))
        
        # handle multiclass problems using One-vs-Rest 
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes")
        if n_classes > 2:
            self.coef_, self.sigma_ = [0]*n_classes,[0]*n_classes
            self.intercept_         = [0]*n_classes
        else:
            self.coef_, self.sigma_, self.intercept_ = [0],[0],[0]
        
        # huperparameters of 
        a  = self.a + 0.5 * n_features
        b  = self.b
        
        for i in range(len(self.coef_)):
            if n_classes == 2:
                pos_class = self.classes_[1]
            else:
                pos_class   = self.classes_[i]
            mask            = (y == pos_class)
            y_bin           = np.ones(y.shape, dtype=np.float64)
            y_bin[~mask]    = 0
            coef_, sigma_   = self._fit(X,y_bin,a,b)
            intercept_ = 0
            if self.fit_intercept:
                intercept_  = coef_[0]
                coef_       = coef_[1:]
            self.coef_[i]   = coef_
            self.intercept_[i] = intercept_
            self.sigma_[i]  = sigma_
        self.coef_  = np.asarray(self.coef_)
        return self
Example #14
0
    def fit(self, X, y):
        """A fitting function for a classifier.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values. An array of int.

        Returns
        -------
        self : object
            Returns self.
        """
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        check_classification_targets(y)

        # Store training set and number of features
        self.X_ = X
        self.y_ = y
        self.n_features_ = X.shape[1]

        # Establish subspace
        if self.given_subspace is None:
            self.subspace_ = self._assumed_subspace()
        else:
            self.subspace_ = np.array(self.given_subspace)

        # Acquire subspaced X
        subspaced_X = X[:, self.subspace_].astype('float64')

        # Store the classes seen during fit
        self.classes_, y = np.unique(y, return_inverse=True)

        # Scaler
        self.scaler_ = MinMaxScaler()
        self.scaler_.fit(subspaced_X)

        # Expose
        self.model_ = self.expose(subspaced_X, y)

        # HSV
        self._hue = np.argmax(self.model_, axis=2) / float(len(self.classes_))
        self._saturation = np.max(self.model_, axis=2) - \
            np.min(self.model_, axis=2)
        self._value = np.max(self.model_, axis=2)
        self._hsv = np.dstack((self._hue, self._saturation, self._value))

        # Calculate measures
        self._calculate_measures()

        # Prepare linear model
        self.linear_model_ = self.model_.reshape((-1, len(self.classes_)))
        self.linear_model_ = np.divide(self.linear_model_,
                                       np.sum(self.linear_model_, axis=0))

        # Return the classifier
        return self
Example #15
0
 def fit(self,
         X: np.array,
         y: np.array,
         sample_weight: np.array = None) -> Odte:
     # Check parameters are Ok.
     if self.n_estimators < 3:
         raise ValueError(
             f"n_estimators must be greater than 2 but got (n_estimators=\
                 {self.n_estimators})")
     check_classification_targets(y)
     X, y = self._validate_data(X, y)
     # if weights is None return np.ones
     sample_weight = _check_sample_weight(sample_weight,
                                          X,
                                          dtype=np.float64)
     check_classification_targets(y)
     # Initialize computed parameters
     #  Build the estimator
     self.max_features_ = self._initialize_max_features()
     # build base_estimator_
     self._validate_estimator()
     self.classes_, y = np.unique(y, return_inverse=True)
     self.n_classes_: int = self.classes_.shape[0]
     self.estimators_: List[BaseEstimator] = []
     self.subspaces_: List[Tuple[int, ...]] = []
     result = self._train(X, y, sample_weight)
     self.estimators_, self.subspaces_ = tuple(zip(*result))  # type: ignore
     return self
 def _validate_y(self, y):
     """Validate the label vector."""
     y = column_or_1d(y, warn=True)
     check_classification_targets(y)
     self.classes_, y_encoded = np.unique(y, return_inverse=True)
     self.n_classes_ = len(self.classes_)
     return y
Example #17
0
    def fit(self, x, y):
        from cvxopt import matrix, solvers
        solvers.options['show_progress'] = False

        check_classification_targets(y)
        x, y = check_X_y(x, y)
        x_s, x_u = x[y == +1, :], x[y == 0, :]
        n_s, n_u = len(x_s), len(x_u)

        p_p = self.prior
        p_n = 1 - self.prior
        p_s = p_p**2 + p_n**2
        k_s = self._basis(x_s)
        k_u = self._basis(x_u)
        d = k_u.shape[1]

        P = np.zeros((d + 2 * n_u, d + 2 * n_u))
        P[:d, :d] = self.lam * np.eye(d)
        q = np.vstack((-p_s / (n_s * (p_p - p_n)) * k_s.T.dot(np.ones(
            (n_s, 1))), -p_n / (n_u * (p_p - p_n)) * np.ones(
                (n_u, 1)), -p_p / (n_u * (p_p - p_n)) * np.ones((n_u, 1))))
        G = np.vstack(
            (np.hstack((np.zeros((n_u, d)), -np.eye(n_u), np.zeros(
                (n_u, n_u)))),
             np.hstack((0.5 * k_u, -np.eye(n_u), np.zeros((n_u, n_u)))),
             np.hstack((k_u, -np.eye(n_u), np.zeros((n_u, n_u)))),
             np.hstack((np.zeros((n_u, d)), np.zeros(
                 (n_u, n_u)), -np.eye(n_u))),
             np.hstack((-0.5 * k_u, np.zeros((n_u, n_u)), -np.eye(n_u))),
             np.hstack((-k_u, np.zeros((n_u, n_u)), -np.eye(n_u)))))
        h = np.vstack((np.zeros((n_u, 1)), -0.5 * np.ones(
            (n_u, 1)), np.zeros((n_u, 1)), np.zeros((n_u, 1)), -0.5 * np.ones(
                (n_u, 1)), np.zeros((n_u, 1))))
        sol = solvers.qp(matrix(P), matrix(q), matrix(G), matrix(h))
        self.coef_ = np.array(sol['x'])[:d]
Example #18
0
    def fit(self, X: np.ndarray, y: np.ndarray):
        X, y = check_X_y(X, y, allow_nd=True)
        check_classification_targets(y)
        self.n_features_ = X.shape[1]
        self.classes_, class_counts = np.unique(y, return_counts=True)
        self.n_classes_ = len(self.classes_)
        self.priors_ = self.priors or (class_counts / np.sum(class_counts))

        X = self._preprocess(X)

        graph = self.graph
        if graph is None:
            if self.structure == 'naive':
                graph = self._naive_structure()
            elif self.structure == 'tree':
                graph = self._chow_liu_structure(X, y)
            else:
                raise ValueError(f'Invalid structure type: {self.structure}')
        self.graph_ = graph

        self.models_ = [
            self._partial_fit(graph_k, X[y == cls])
            for graph_k, cls in zip(graph, self.classes_)
        ]
        return self
    def fit(self, X, y, sample_weight=None):
        """Fit the estimators.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like of shape (n_samples,)
            Target values.
        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if all underlying estimators
            support sample weights.
            .. versionadded:: 0.18
        Returns
        -------
        self : object
        """
        check_classification_targets(y)
        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
            raise NotImplementedError('Multilabel and multi-output'
                                      ' classification is not supported.')

        if self.voting not in ('soft', 'hard'):
            raise ValueError("Voting must be 'soft' or 'hard'; got (voting=%r)"
                             % self.voting)

        #self.le_ = LabelEncoder().fit(y)
        #self.classes_ = self.le_.classes_
        #transformed_y = self.le_.transform(y)
        self.estimators_=[est[1] for est in self.estimators]
        return self #super().fit(X, transformed_y, sample_weight)
    def fit_transformer(self, X, y, epochs = 100, lr = 3e-4):
        #format y
        check_classification_targets(y)
        
        self.network = keras.Sequential()
        self.network.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu', input_shape=np.shape(X)[1:]))
        self.network.add(layers.BatchNormalization())
        self.network.add(layers.Conv2D(filters=32, kernel_size=(3, 3), strides = 2, padding = "same", activation='relu'))
        self.network.add(layers.BatchNormalization())
        self.network.add(layers.Conv2D(filters=64, kernel_size=(3, 3), strides = 2, padding = "same", activation='relu'))
        self.network.add(layers.BatchNormalization())
        self.network.add(layers.Conv2D(filters=128, kernel_size=(3, 3), strides = 2, padding = "same", activation='relu'))
        self.network.add(layers.BatchNormalization())
        self.network.add(layers.Conv2D(filters=254, kernel_size=(3, 3), strides = 2, padding = "same", activation='relu'))

        self.network.add(layers.Flatten())
        self.network.add(layers.Dense(2000, activation='relu'))
        self.network.add(layers.Dense(2000, activation='relu'))
        self.network.add(layers.Dense(units=len(np.unique(y)), activation = 'softmax'))

        self.network.compile(loss = 'categorical_crossentropy', metrics=['acc'], optimizer = keras.optimizers.Adam(lr))
        self.network.fit(X, 
                    keras.utils.to_categorical(y), 
                    epochs = epochs, 
                    callbacks = [EarlyStopping(patience = 4, monitor = "val_acc")], 
                    verbose = self.verbose,
                    validation_split = .33)
        self.encoder = keras.models.Model(inputs = self.network.inputs, outputs = self.network.layers[-2].output)
        
        #make sure to flag that we're fit
        self.transformer_fitted_ = True
    def fit(self, X: pd.DataFrame, y: pd.Series):
        """
        Learn the mean target value per category or bin.

        Parameters
        ----------
        X : pandas dataframe of shape = [n_samples, n_features]
            The training input samples.

        y : pandas series of shape = [n_samples,]
            The target variable.
        """
        check_classification_targets(y)

        self.classes_ = unique_labels(y)

        # check that y is binary
        if len(self.classes_) > 2:
            raise NotImplementedError(
                "This classifier is designed for binary classification only. "
                "The target has more than 2 unique values.")

        # if target has values other than 0 and 1, we need to remap the values,
        # to be able to compute meaningful averages.
        if any(x for x in self.classes_ if x not in [0, 1]):
            y = np.where(y == unique_labels(y)[0], 0, 1)

        return super().fit(X, y)
Example #22
0
    def fit(self, X, y):
        """Fit the model according to the given training data and parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array, shape = [n_samples]
            Target values.
        """
        X, y = check_X_y(X, y)
        check_classification_targets(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        n_samples, n_features = X.shape
        n_classes = len(self.classes_)

        _, counts = np.unique(y, return_counts=True)
        if np.any(counts > n_features):
            warnings.warn("Found some classes with more counts than input "
                          "features. Results may be unstable.")

        self.hat_ = []

        for ind in range(n_classes):
            Xg = X[y == ind, :]
            Gg = np.dot(Xg, Xg.T)
            self.hat_.append(np.dot(np.dot(Xg.T, np.linalg.inv(Gg)), Xg))

        return self
Example #23
0
    def fit(self, X, y):
        """Fit underlying estimators.
        Parameters
        ----------
        X : (sparse) array-like, shape = [n_samples, n_features]
            Data.
        y : array-like, shape = [n_samples]
            Multi-class targets.
        Returns
        -------
        self
        """
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
        check_classification_targets(y)

        self.classes_ = np.unique(y)
        if len(self.classes_) == 1:
            raise ValueError("OneVsOneClassifier can not be fit when only one"
                             " class is present.")
        n_classes = self.classes_.shape[0]
        estimators_indices = list(zip(*(Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_ovo_binary)
            (self.estimator, X, y, self.classes_[i], self.classes_[j])
            for i in range(n_classes) for j in range(i + 1, n_classes)))))

        self.estimators_ = estimators_indices[0]
        # try:
        #     self.pairwise_indices_ = (
        #         estimators_indices[1] if self._pairwise else None)
        # except AttributeError:
        #     self.pairwise_indices_ = None

        return estimators_indices
Example #24
0
    def fit(self, X, y):
        """Fit classifier.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # Validate inputs X and y
        X, y = check_X_y(X, y)
        X = check_array(X)
        check_classification_targets(y)
        self._classes = len(np.unique(y))

        if self.pre_fitted:
            print("Training skipped")
            return
        else:
            for clf in self.classifiers:
                clf.fit(X, y)
                clf.fitted_ = True
            return
Example #25
0
    def fit(self, X, y, sample_weight=None):
        """Fit the estimators.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vectors, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : array-like of shape (n_samples,)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if all underlying estimators
            support sample weights.

        Returns
        -------
        self : object
        """
        check_classification_targets(y)
        self._le = LabelEncoder().fit(y)
        self.classes_ = self._le.classes_
        return super().fit(X, self._le.transform(y), sample_weight)
Example #26
0
    def _validate_y(self, y):
        """Validate the label vector."""

        y = column_or_1d(y, warn=True)
        check_classification_targets(y)

        return y
Example #27
0
    def fit(self,X,y):
        '''
        Fits Logistic Regression with ARD
        
        Parameters
        ----------
        X: array-like of size [n_samples, n_features]
           Training data, matrix of explanatory variables
        
        y: array-like of size [n_samples] 
           Target values
           
        Returns
        -------
        self : object
            Returns self.
        '''
        X, y = check_X_y(X, y, accept_sparse = None, dtype=np.float64)
        n_samples, n_features = X.shape

        # preprocess features
        self._X_mean = np.zeros(n_features)
        self._X_std  = np.ones(n_features)
        if self.normalize:
            self._X_mean, self._X_std = np.mean(X,0), np.std(X,0)
        X = (X - self._X_mean) / self._X_std
        if self.fit_intercept:
            X = np.concatenate((np.ones([n_samples,1]),X),1)
            n_features += 1
        
        # preprocess targets
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes"
                             " in the data, but the data contains only one"
                             " class: %r" % self.classes_[0])
        
        # if multiclass use OVR (i.e. fit classifier for each class)
        self.coef_,self.active_ ,self.lambda_= list(),list(),list()
        self.intercept_, self.sigma_ = list(),list()            
        for pos_class in self.classes_:
            if n_classes == 2:
                pos_class = self.classes_[1]
            mask = (y == pos_class)
            y_bin = np.zeros(y.shape, dtype=np.float64)
            y_bin[mask] = 1
            coef_, intercept_, active_ , sigma_ , A  = self._fit(X,y_bin,
                                                       n_samples,n_features)
            self.coef_.append(coef_)
            self.active_.append(active_)
            self.intercept_.append(intercept_)
            self.sigma_.append(sigma_)
            self.lambda_.append(A)
            # in case of binary classification fit only one classifier           
            if n_classes == 2:
                break
            
        return self
Example #28
0
    def get_model(self, X, y):
        if not isinstance(X, (KDTree, BallTree)):
            X, y = check_X_y(X, y, "csr", multi_output=True)

        if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:
            if y.ndim != 1:
                warnings.warn("A column-vector y was passed when a 1d array "
                              "was expected. Please change the shape of y to "
                              "(n_samples, ), for example using ravel().",
                              DataConversionWarning, stacklevel=2)

            self.outputs_2d_ = False
            y = y.reshape((-1, 1))
        else:
            self.outputs_2d_ = True

        check_classification_targets(y)
        self.classes_ = []
        self._y = np.empty(y.shape, dtype=np.int)
        for k in range(self._y.shape[1]):
            classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)
            self.classes_.append(classes)

        if not self.outputs_2d_:
            self.classes_ = self.classes_[0]
            self._y = self._y.ravel()

        return self._fit(X)
Example #29
0
    def fit(self, X, y):
        '''
        Fits Logistic Regression with ARD
        
        Parameters
        ----------
        X: array-like of size [n_samples, n_features]
           Training data, matrix of explanatory variables
        
        y: array-like of size [n_samples] 
           Target values
           
        Returns
        -------
        self : object
            Returns self.
        '''
        X, y = check_X_y(X, y, accept_sparse=None, dtype=np.float64)
        n_samples, n_features = X.shape

        # preprocess features
        self._X_mean = np.zeros(n_features)
        self._X_std = np.ones(n_features)
        if self.normalize:
            self._X_mean, self._X_std = np.mean(X, 0), np.std(X, 0)
        X = (X - self._X_mean) / self._X_std
        if self.fit_intercept:
            X = np.concatenate((np.ones([n_samples, 1]), X), 1)
            n_features += 1

        # preprocess targets
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes"
                             " in the data, but the data contains only one"
                             " class: %r" % self.classes_[0])

        # if multiclass use OVR (i.e. fit classifier for each class)
        self.coef_, self.active_, self.lambda_ = list(), list(), list()
        self.intercept_, self.sigma_ = list(), list()
        for pos_class in self.classes_:
            if n_classes == 2:
                pos_class = self.classes_[1]
            mask = (y == pos_class)
            y_bin = np.zeros(y.shape, dtype=np.float64)
            y_bin[mask] = 1
            coef_, intercept_, active_, sigma_, A = self._fit(
                X, y_bin, n_samples, n_features)
            self.coef_.append(coef_)
            self.active_.append(active_)
            self.intercept_.append(intercept_)
            self.sigma_.append(sigma_)
            self.lambda_.append(A)
            # in case of binary classification fit only one classifier
            if n_classes == 2:
                break

        return self
    def fit(self, X, y):
        """
        Fits the transformer to data X with labels y.

        Parameters
        ----------
        X : ndarray
            Input data matrix.
        y : ndarray
            Output (i.e. response data matrix).
        """

        check_classification_targets(y)
        _, y = np.unique(y, return_inverse=True)
        self.num_classes = len(np.unique(y))

        # more typechecking
        self.network.compile(loss=self.loss,
                             optimizer=self.optimizer,
                             **self.compile_kwargs)
        self.network.fit(
            X, keras.utils.to_categorical(y, num_classes=self.num_classes),
            **self.fit_kwargs)
        self._is_fitted = True

        return self
Example #31
0
    def fit(self, X, y):
        if self.fit_flag_:
            self._clear_params()

        X, y = check_X_y(X, y)
        check_classification_targets(y)

        self.classes_ = unique_labels(y)
        self.X_ = X
        self.y_ = y

        # setup parameters
        n_samples, n_features = X.shape
        if self.w_ is None:
            self.w0_ = 0
            self.w_ = np.zeros(n_features)
            self.V_ = np.random.normal(0, 0.001, (self.n_factors, n_features))

        self._update_class_weight(X, y)
        self.train_tracker_.start_train()
        for n_epoch in range(self.epochs):
            self.train_tracker_.start_epoch(n_epoch)
            self._train(X, y, n_samples, n_features)
            self.train_tracker_.end_epoch()

        self.train_tracker_.end_train()
        self.fit_flag_ = True

        return self
    def _validate_y(self, y):
        y = column_or_1d(y, warn=True)
        check_classification_targets(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        self.n_classes_ = len(self.classes_)

        return y
Example #33
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit the model.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,)
            Target values.

        sample_weight, array-like of shape (n_samples,), default=Noone
            Individual weights for each sample.

        Returns
        -------
        self
        """
        check_classification_targets(y)
        if sample_weight is None:
            self.classes_, self.counts_ = np.unique(y, return_counts=True)
        else:
            sample_weight = _check_sample_weight(sample_weight, X)
            sample_weight = sample_weight / sample_weight.mean()
            df = pd.DataFrame({'y': y, 'sample_weight': sample_weight})
            df = df.groupby('y').sum()
            self.classes_ = df.index.values
            self.counts_ = df.sample_weight.values
        self.counts_ = self.counts_ / self.counts_.sum()
        self.dominant_class_ = self.classes_[np.argmax(self.counts_)]
        return self
    def _validate_y_class_weight(self, y):
        check_classification_targets(y)

        y = np.copy(y)
        expanded_class_weight = None

        if self.class_weight is not None:
            y_original = np.copy(y)

        self.classes_ = []
        self.n_classes_ = []

        y_store_unique_indices = np.zeros(y.shape, dtype=np.int)
        for k in range(self.n_outputs_):
            classes_k, y_store_unique_indices[:, k] = np.unique(y[:, k], return_inverse=True)
            self.classes_.append(classes_k)
            self.n_classes_.append(classes_k.shape[0])
        y = y_store_unique_indices

        if self.class_weight is not None:
            valid_presets = ('auto', 'balanced', 'subsample', 'balanced_subsample')
            if isinstance(self.class_weight, six.string_types):
                if self.class_weight not in valid_presets:
                    raise ValueError('Valid presets for class_weight include '
                                     '"balanced" and "balanced_subsample". Given "%s".'
                                     % self.class_weight)
                if self.class_weight == "subsample":
                    warn("class_weight='subsample' is deprecated in 0.17 and"
                         "will be removed in 0.19. It was replaced by "
                         "class_weight='balanced_subsample' using the balanced"
                         "strategy.", DeprecationWarning)
                if self.warm_start:
                    warn('class_weight presets "balanced" or "balanced_subsample" are '
                         'not recommended for warm_start if the fitted data '
                         'differs from the full dataset. In order to use '
                         '"balanced" weights, use compute_class_weight("balanced", '
                         'classes, y). In place of y you can use a large '
                         'enough sample of the full training set target to '
                         'properly estimate the class frequency '
                         'distributions. Pass the resulting weights as the '
                         'class_weight parameter.')

            if (self.class_weight not in ['subsample', 'balanced_subsample'] or
                    not self.bootstrap):
                if self.class_weight == 'subsample':
                    class_weight = 'auto'
                elif self.class_weight == "balanced_subsample":
                    class_weight = "balanced"
                else:
                    class_weight = self.class_weight
                with warnings.catch_warnings():
                    if class_weight == "auto":
                        warnings.simplefilter('ignore', DeprecationWarning)
                    expanded_class_weight = compute_sample_weight(class_weight,
                                                                  y_original)

        return y, expanded_class_weight
Example #35
0
def test_check_classification_targets():
    for y_type in EXAMPLES.keys():
        if y_type in ["unknown", "continuous", 'continuous-multioutput']:
            for example in EXAMPLES[y_type]:
                msg = 'Unknown label type: '
                assert_raises_regex(ValueError, msg,
                                    check_classification_targets, example)
        else:
            for example in EXAMPLES[y_type]:
                check_classification_targets(example)
Example #36
0
    def _validate_y(self, y):
        y = column_or_1d(y, warn=True)
        check_classification_targets(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        n_classes = len(self.classes_)

        if n_classes > 2:
            raise ValueError("It's a binary classification algorithm. Use a dataset with only 2 classes to predict.")

        return y
Example #37
0
    def fit(self, X, y, sample_weight=None):
        """
        Build a classifier from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values (class labels in classification).

        sample_weight : array-like, shape = [n_samples] or None
            Individual weights for each sample.

        Returns
        -------
        self : object
            Returns self.
        """
        self._validate_params(**self.get_params())

        X, y = check_X_y(X, y, accept_sparse=True)
        if sp.isspmatrix(X):
            self._is_sparse_train_X = True
        else:
            self._is_sparse_train_X = False
        self._n_samples, self._n_features = X.shape
        sample_weight = self._get_sample_weight(sample_weight)
        check_consistent_length(X, y, sample_weight)
        check_classification_targets(y)
        self._classes = sorted(np.unique(y))
        self._n_classes = len(self._classes)
        self._classes_map = {}

        self._set_params_with_dependencies()
        params = self._get_params()

        if self._n_classes == 2:
            self._classes_map[0] = self._classes[0]
            self._classes_map[1] = self._classes[1]
            self._estimators = [None]
            y = (y == self._classes[0]).astype(int)
            self._fit_binary_task(X, y, sample_weight, params)
        elif self._n_classes > 2:
            if sp.isspmatrix_dok(X):
                X = X.tocsr().tocoo()  # Fix to avoid scipy 7699 issue
            self._estimators = [None] * self._n_classes
            self._fit_multiclass_task(X, y, sample_weight, params)
        else:
            raise ValueError("Classifier can't predict when only one class is present.")

        self._fitted = True

        return self
    def fit(self,X,y):
        '''
        Fits Bayesian Logistic Regression

        Parameters
        -----------
        X: array-like of size (n_samples, n_features)
           Training data, matrix of explanatory variables
        
        y: array-like of size (n_samples, )
           Target values
           
        Returns
        -------
        self: object
           self
        '''
        # preprocess data
        X,y = check_X_y( X, y , dtype = np.float64)
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)
        
        # prepare for ovr if required        
        n_samples, n_features = X.shape
        if self.fit_intercept:
            X = self._add_intercept(X)
            
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes")
        if n_classes > 2:
            self.coef_, self.sigma_ = [0]*n_classes,[0]*n_classes
            self.intercept_         = [0]*n_classes
        else:
            self.coef_, self.sigma_, self.intercept_ = [0],[0],[0]

        # make classifier for each class (one-vs-the rest)
        for i in range(len(self.coef_)):
            if n_classes == 2:
                pos_class = self.classes_[1]
            else:
                pos_class = self.classes_[i]
            mask = (y == pos_class)
            y_bin = np.ones(y.shape, dtype=np.float64)
            y_bin[~mask] = self._mask_val
            coef_, sigma_ = self._fit(X,y_bin)
            if self.fit_intercept:
                self.intercept_[i],self.coef_[i] = self._get_intercept(coef_)
            else:
                self.coef_[i]      = coef_
            self.sigma_[i] = sigma_
            
        self.coef_  = np.asarray(self.coef_)
        return self
    def fit(self,X,y):
        '''
        Fits Bayesian Logistic Regression with Laplace approximation

        Parameters
        -----------
        X: array-like of size [n_samples, n_features]
           Training data, matrix of explanatory variables
        
        y: array-like of size [n_samples, n_features] 
           Target values
           
        Returns
        -------
        self: object
           self
        '''
        # preprocess data
        X,y = check_X_y( X, y , dtype = np.float64)
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)
        
        # take into account bias term if required 
        n_samples, n_features = X.shape
        n_features = n_features + int(self.fit_intercept)
        
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes")
        if n_classes > 2:
            self.coef_, self.sigma_ = [0]*n_classes,[0]*n_classes
            self.intercept_         = [0]*n_classes
        else:
            self.coef_, self.sigma_, self.intercept_ = [0],[0],[0]

        for i in range(len(self.coef_)):
            w0 = np.zeros(n_features)
            if n_classes == 2:
                pos_class = self.classes_[1]
            else:
                pos_class = self.classes_[i]
            mask = (y == pos_class)
            y_bin = np.ones(y.shape, dtype=np.float64)
            y_bin[~mask] = -1.
            coef, sigma_ = self._fit(X,y_bin,w0, self.alpha)
            if self.fit_intercept:
                self.intercept_[i] = coef[-1]
                coef_              = coef[:-1]
            self.coef_[i]  = coef_
            self.sigma_[i] = sigma_
            
        self.coef_  = np.asarray(self.coef_)
        return self
Example #40
0
	def _prepare(self,X,Y):
		'''preprocess data before training'''
		check_classification_targets(Y)
		self.classes_ = np.unique(Y)
		if len(self.classes_) < 2:
			raise ValueError("The number of classes has to be almost 2; got ", len(self.classes_))
		self.multiclass_ = len(self.classes_) > 2

		KL = process_list(X,self.generator)				# X can be a samples matrix or Kernels List
		self.KL, self.Y = check_KL_Y(KL,Y)
		self.n_kernels = len(self.KL)
		return
    def _encode_y(self, y):
        # encode classes into 0 ... n_classes - 1 and sets attributes classes_
        # and n_trees_per_iteration_
        check_classification_targets(y)

        label_encoder = LabelEncoder()
        encoded_y = label_encoder.fit_transform(y)
        self.classes_ = label_encoder.classes_
        n_classes = self.classes_.shape[0]
        # only 1 tree for binary classification. For multiclass classification,
        # we build 1 tree per class.
        self.n_trees_per_iteration_ = 1 if n_classes <= 2 else n_classes
        encoded_y = encoded_y.astype(Y_DTYPE, copy=False)
        return encoded_y
    def fit(self, X, y):
        check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo', 'dok',
                        'bsr', 'lil', 'dia'])
        check_array(X, accept_sparse=['csc', 'csr', 'coo', 'dok',
                        'bsr', 'lil', 'dia'])
        self.X_ = X
        check_classification_targets(y)
        classes = np.nonzero(y)

        n_samples, n_classes = len(y), len(classes)
        # create diagonal matrix of degree of nodes
        if sparse.isspmatrix(self.X_):
            B_ = self.X_.copy().astype(np.float)
            D = np.array(csr_matrix.sum(self.X_, axis=1), dtype=np.float).T[0]
        else:
            B_ = np.copy(self.X_).astype(np.float)
            D = np.array(np.sum(self.X_, axis=1), dtype=np.float)

        # if  (- self.sigma) and (self.sigma - 1) doesn't equals we have different diagonal matrix at the left and right sides
        if (- self.sigma) == (self.sigma - 1):
            D_left = D_right = np.power(D, - self.sigma)
        else:
            D_left = np.power(D, - self.sigma)
            D_right = np.power(self.sigma - 1)

        # M_ = D_left.dot(B_)
        for i, d in enumerate(D_left):
            B_[i, :] *= d
        # B_ = M_.dot(D_right)
        for i, d in enumerate(D_right):
            B_[:, i] *= d
        # create labeled data Z
        dimension = (n_samples, n_classes)
        labels = np.nonzero(y)
        ans_y = np.zeros(dimension)
        for l in labels[0]:
            ans_y[l][y[l] - 1] = 1

        Z_ = (self.sigma / (1 + self.sigma)) * ans_y
        self.initial_vector_ = np.ones(dimension) / n_classes
        self._get_method_(B_, Z_)
        return self
Example #43
0
    def _set_n_classes(self, y):
        """Set the number of classes if `y` is presented, which is not
        expected. It could be useful for multi-class outlier detection.

        Parameters
        ----------
        y : numpy array of shape (n_samples,)
            Ground truth.

        Returns
        -------
        self
        """

        self._classes = 2  # default as binary classification
        if y is not None:
            check_classification_targets(y)
            self._classes = len(np.unique(y))
            warnings.warn(
                "y should not be presented in unsupervised learning.")
        return self
Example #44
0
    def fit_resample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : array-like, shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : {array-like, sparse matrix}, shape \
(n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : array-like, shape (n_samples_new,)
            The corresponding label of `X_resampled`.

        """
        self._deprecate_ratio()

        check_classification_targets(y)
        X, y, binarize_y = self._check_X_y(X, y)

        self.sampling_strategy_ = check_sampling_strategy(
            self.sampling_strategy, y, self._sampling_type)

        output = self._fit_resample(X, y)

        if binarize_y:
            y_sampled = label_binarize(output[1], np.unique(y))
            if len(output) == 2:
                return output[0], y_sampled
            return output[0], y_sampled, output[2]
        return output
Example #45
0
 def fit(self, X, Y):
     
     """Fit the model according to the given training data
     
     Parameters
     ----------
     X : array-like, shape = [n_samples, n_features]
         Matrix of the examples, where
         n_samples is the number of samples and
         n_feature is the number of features
     
     Y : array-like, shape = [n_samples]
         array of the labels relative to X
     
     Returns
     -------
     self : object
         Returns self
     """
     X,Y = validation.check_X_y(X, Y, dtype=np.float64, order='C', accept_sparse='csr')
     #check_consistent_length(X,Y)
     check_classification_targets(Y)
     
     self.classes_ = np.unique(Y)
     if len(self.classes_) < 2:
         raise ValueError("The number of classes has to be almost 2; got ", len(self.classes_))
     
     if len(self.classes_) == 2:
         self.multiclass_ = False
         return self._fit(X,Y)
     else :
         self.multiclass_ = True
         if self.multiclass_strategy == 'ovo':
             return self._one_vs_one(X,Y)
         else :
             return self._one_vs_rest(X,Y)
     raise ValueError('This is a very bad exception...')
 def fit_resample(self, X, y):
     check_classification_targets(y)
     self.fit(X, y)
     return X, y
Example #47
0
    def _fit(self, X, y, sample_weight=None, relative_penalties=None):
        if self.lambda_path is not None:
            n_lambda = len(self.lambda_path)
            min_lambda_ratio = 1.0
        else:
            n_lambda = self.n_lambda
            min_lambda_ratio = self.min_lambda_ratio

        check_classification_targets(y)
        self.classes_ = np.unique(y)  # the output of np.unique is sorted
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Training data need to contain at least 2 "
                             "classes.")

        # glmnet requires the labels a one-hot-encoded array of
        # (n_samples, n_classes)
        if n_classes == 2:
            # Normally we use 1/0 for the positive and negative classes. Since
            # np.unique sorts the output, the negative class will be in the 0th
            # column. We want a model predicting the positive class, not the
            # negative class, so we flip the columns here (the != condition).
            #
            # Broadcast comparison of self.classes_ to all rows of y. See the
            # numpy rules on broadcasting for more info, essentially this
            # "reshapes" y to (n_samples, n_classes) and self.classes_ to
            # (n_samples, n_classes) and performs an element-wise comparison
            # resulting in _y with shape (n_samples, n_classes).
            _y = (y[:, None] != self.classes_).astype(np.float64, order='F')
        else:
            # multinomial case, glmnet uses the entire array so we can
            # keep the original order.
            _y = (y[:, None] == self.classes_).astype(np.float64, order='F')

        # use sample weights, making sure all weights are positive
        # this is inspired by the R wrapper for glmnet, in lognet.R
        if sample_weight is not None:
            weight_gt_0 = sample_weight > 0
            sample_weight = sample_weight[weight_gt_0]
            _y = _y[weight_gt_0, :]
            X = X[weight_gt_0, :]
            _y = _y * np.expand_dims(sample_weight, 1)

        # we need some sort of "offset" array for glmnet
        # an array of shape (n_examples, n_classes)
        offset = np.zeros((X.shape[0], n_classes), dtype=np.float64,
                          order='F')

        # You should have thought of that before you got here.
        exclude_vars = 0

        # how much each feature should be penalized relative to the others
        # this may be useful to expose to the caller if there are vars that
        # must be included in the final model or there is some prior knowledge
        # about how important some vars are relative to others, see the glmnet
        # vignette:
        # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
        if relative_penalties is None:
            relative_penalties = np.ones(X.shape[1], dtype=np.float64,
                                         order='F')

        coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order='F')
        coef_bounds[0, :] = self.lower_limits
        coef_bounds[1, :] = self.upper_limits

        if n_classes == 2:
            # binomial, tell glmnet there is only one class
            # otherwise we will get a coef matrix with two dimensions
            # where each pair are equal in magnitude and opposite in sign
            # also since the magnitudes are constrained to sum to one, the
            # returned coefficients would be one half of the proper values
            n_classes = 1


        # This is a stopping criterion (nx)
        # R defaults to nx = num_features, and ne = num_features + 1
        if self.max_features is None:
            max_features = X.shape[1]
        else:
            max_features = self.max_features

        # for documentation on the glmnet function lognet, see doc.py
        if issparse(X):
            _x = csc_matrix(X, dtype=np.float64, copy=True)

            (self.n_lambda_,
             self.intercept_path_,
             ca,
             ia,
             nin,
             _,  # dev0
             _,  # dev
             self.lambda_path_,
             _,  # nlp
             jerr) = splognet(self.alpha,
                              _x.shape[0],
                              _x.shape[1],
                              n_classes,
                              _x.data,
                              _x.indptr + 1,  # Fortran uses 1-based indexing
                              _x.indices + 1,
                              _y,
                              offset,
                              exclude_vars,
                              relative_penalties,
                              coef_bounds,
                              max_features,
                              X.shape[1] + 1,
                              min_lambda_ratio,
                              self.lambda_path,
                              self.tol,
                              n_lambda,
                              self.standardize,
                              self.fit_intercept,
                              self.max_iter,
                              0)
        else:  # not sparse
            # some notes: glmnet requires both x and y to be float64, the two
            # arrays
            # may also be overwritten during the fitting process, so they need
            # to be copied prior to calling lognet. The fortran wrapper will
            # copy any arrays passed to a wrapped function if they are not in
            # the fortran layout, to avoid making extra copies, ensure x and y
            # are `F_CONTIGUOUS` prior to calling lognet.
            _x = X.astype(dtype=np.float64, order='F', copy=True)

            (self.n_lambda_,
             self.intercept_path_,
             ca,
             ia,
             nin,
             _,  # dev0
             _,  # dev
             self.lambda_path_,
             _,  # nlp
             jerr) = lognet(self.alpha,
                            n_classes,
                            _x,
                            _y,
                            offset,
                            exclude_vars,
                            relative_penalties,
                            coef_bounds,
                            X.shape[1] + 1,
                            min_lambda_ratio,
                            self.lambda_path,
                            self.tol,
                            max_features,
                            n_lambda,
                            self.standardize,
                            self.fit_intercept,
                            self.max_iter,
                            0)

        # raises RuntimeError if self.jerr_ is nonzero
        self.jerr_ = jerr
        _check_error_flag(self.jerr_)

        # glmnet may not return the requested number of lambda values, so we
        # need to trim the trailing zeros from the returned path so
        # len(lambda_path_) is equal to n_lambda_
        self.lambda_path_ = self.lambda_path_[:self.n_lambda_]
        # also fix the first value of lambda
        self.lambda_path_ = _fix_lambda_path(self.lambda_path_)
        self.intercept_path_ = self.intercept_path_[:, :self.n_lambda_]
        # also trim the compressed coefficient matrix
        ca = ca[:, :, :self.n_lambda_]
        # and trim the array of n_coef per lambda (may or may not be non-zero)
        nin = nin[:self.n_lambda_]
        # decompress the coefficients returned by glmnet, see doc.py
        self.coef_path_ = lsolns(X.shape[1], ca, ia, nin)
        # coef_path_ has shape (n_features, n_classes, n_lambda), we should
        # match shape for scikit-learn models:
        # (n_classes, n_features, n_lambda)
        self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2))

        return self
Example #48
0
    def fit(self, X, y, sample_weight=None):
        """
        Build a RGF Classifier from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values (class labels in classification).

        sample_weight : array-like, shape = [n_samples] or None
            Individual weights for each sample.

        Returns
        -------
        self : object
            Returns self.
        """
        _validate_params(**self.get_params())

        X, y = check_X_y(X, y, accept_sparse=True)
        n_samples, self._n_features = X.shape

        if self.sl2 is None:
            self._sl2 = self.l2
        else:
            self._sl2 = self.sl2

        if isinstance(self.min_samples_leaf, _FLOATS):
            self._min_samples_leaf = ceil(self.min_samples_leaf * n_samples)
        else:
            self._min_samples_leaf = self.min_samples_leaf

        if self.n_iter is None:
            if self.loss == "LS":
                self._n_iter = 10
            else:
                self._n_iter = 5
        else:
            self._n_iter = self.n_iter

        if sample_weight is None:
            sample_weight = np.ones(n_samples, dtype=np.float32)
        else:
            sample_weight = column_or_1d(sample_weight, warn=True)
            if (sample_weight <= 0).any():
                raise ValueError("Sample weights must be positive.")
        check_consistent_length(X, y, sample_weight)
        check_classification_targets(y)

        self._classes = sorted(np.unique(y))
        self._n_classes = len(self._classes)
        self._classes_map = {}

        params = dict(max_leaf=self.max_leaf,
                      test_interval=self.test_interval,
                      algorithm=self.algorithm,
                      loss=self.loss,
                      reg_depth=self.reg_depth,
                      l2=self.l2,
                      sl2=self._sl2,
                      normalize=self.normalize,
                      min_samples_leaf=self._min_samples_leaf,
                      n_iter=self._n_iter,
                      n_tree_search=self.n_tree_search,
                      opt_interval=self.opt_interval,
                      learning_rate=self.learning_rate,
                      memory_policy=self.memory_policy,
                      verbose=self.verbose)
        if self._n_classes == 2:
            self._classes_map[0] = self._classes[0]
            self._classes_map[1] = self._classes[1]
            self._estimators = [None]
            y = (y == self._classes[0]).astype(int)
            self._estimators[0] = _RGFBinaryClassifier(**params)
            self._estimators[0].fit(X, y, sample_weight)
        elif self._n_classes > 2:
            if sp.isspmatrix_dok(X):
                X = X.tocsr().tocoo()  # Fix to avoid scipy 7699 issue
            self._estimators = [None] * self._n_classes
            ovr_list = [None] * self._n_classes
            for i, cls_num in enumerate(self._classes):
                self._classes_map[i] = cls_num
                ovr_list[i] = (y == cls_num).astype(int)
                self._estimators[i] = _RGFBinaryClassifier(**params)
            self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(_fit_ovr_binary)(self._estimators[i],
                                                                                     X,
                                                                                     ovr_list[i],
                                                                                     sample_weight)
                                                            for i in range(self._n_classes))
        else:
            raise ValueError("Classifier can't predict when only one class is present.")

        self._fitted = True
        return self
Example #49
0
    def fit(self,X,y):
        '''
        Fits Logistic Regression with ARD
        
        Parameters
        ----------
        X: array-like of size [n_samples, n_features]
           Training data, matrix of explanatory variables
        
        y: array-like of size [n_samples] 
           Target values
           
        Returns
        -------
        self : object
            Returns self.
        '''
        X, y = check_X_y(X, y, accept_sparse = None, dtype=np.float64)
                    
        # normalize, if required
        if self.normalize:
            self._x_mean = np.mean(X,0)
            self._x_std  = np.std(X,0)
            X            = (X - self._x_mean) / self._x_std

        # add bias term if required
        if self.fit_intercept:
            X = np.concatenate((np.ones([X.shape[0],1]),X),1)

        # preprocess targets
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes"
                             " in the data, but the data contains only one"
                             " class: %r" % self.classes_[0])
        
        # if multiclass use OVR (i.e. fit classifier for each class)
        if n_classes < 2:
            raise ValueError("Need samples of at least 2 classes")
        if n_classes > 2:
            self.coef_, self.sigma_        = [0]*n_classes,[0]*n_classes
            self.intercept_ , self.active_ = [0]*n_classes, [0]*n_classes
            self.lambda_                   = [0]*n_classes
        else:
            self.coef_, self.sigma_, self.intercept_,self.active_ = [0],[0],[0],[0]
            self.lambda_                                          = [0]
         
        for i in xrange(len(self.classes_)):
            if n_classes == 2:
                pos_class = self.classes_[1]
            else:
                pos_class = self.classes_[i]
            mask = (y == pos_class)
            y_bin = np.zeros(y.shape, dtype=np.float64)
            y_bin[mask] = 1
            coef,bias,active,sigma,lambda_  = self._fit(X,y_bin)
            self.coef_[i], self.intercept_[i], self.sigma_[i]  = coef, bias, sigma
            self.active_[i], self.lambda_[i] = active, lambda_
            # in case of binary classification fit only one classifier           
            if n_classes == 2:
                break  
        self.coef_      = np.asarray(self.coef_)
        self.intercept_ = np.asarray(self.intercept_)
        return self
    def fit(self, X, y):
        """Fit a semi-supervised label propagation model based

        All the input data is provided matrix X (labeled and unlabeled)
        and corresponding label matrix y with a dedicated marker value for
        unlabeled samples.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            A {n_samples by n_samples} size matrix will be created from this

        y : array_like, shape = [n_samples]
            n_labeled_samples (unlabeled points are marked as -1)
            All unlabeled samples will be transductively assigned labels

        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y)
        self.X_ = X
        check_classification_targets(y)

        # actual graph construction (implementations should override this)
        graph_matrix = self._build_graph()

        # label construction
        # construct a categorical distribution for classification only
        classes = np.unique(y)
        classes = (classes[classes != -1])
        self.classes_ = classes

        n_samples, n_classes = len(y), len(classes)

        y = np.asarray(y)
        unlabeled = y == -1
        clamp_weights = np.ones((n_samples, 1))
        clamp_weights[~unlabeled, 0] = 1 - self.alpha

        # initialize distributions
        self.label_distributions_ = np.zeros((n_samples, n_classes))
        for label in classes:
            self.label_distributions_[y == label, classes == label] = 1

        y_static = np.copy(self.label_distributions_)
        if self.alpha > 0.:
            y_static *= self.alpha
        y_static[unlabeled] = 0

        l_previous = np.zeros((self.X_.shape[0], n_classes))

        remaining_iter = self.max_iter
        if sparse.isspmatrix(graph_matrix):
            graph_matrix = graph_matrix.tocsr()
        while (_not_converged(self.label_distributions_, l_previous, self.tol)
                and remaining_iter > 1):
            l_previous = self.label_distributions_
            self.label_distributions_ = safe_sparse_dot(
                graph_matrix, self.label_distributions_)
            # clamp
            self.label_distributions_ = np.multiply(
                clamp_weights, self.label_distributions_) + y_static

            remaining_iter -= 1

        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]
        self.label_distributions_ /= normalizer

        if remaining_iter <= 1:
            warnings.warn('max_iter was reached without convergence.',
                          category=ConvergenceWarning)

        # set the transduction item
        transduction = self.classes_[np.argmax(self.label_distributions_,
                                               axis=1)]
        self.transduction_ = transduction.ravel()
        self.n_iter_ = self.max_iter - remaining_iter
        return self