Esempio n. 1
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit RegularTransferNN
        
        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
        
        fit_params_src : dict, optional (default=None)
            Arguments given to the fit method of the
            source estimator (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the
            target estimator (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        check_indexes(src_index, tgt_index)

        self.model_src_ = check_network(self.get_network,
                                        "get_network",
                                        input_shape=X.shape[1:],
                                        output_shape=y.shape[1:],
                                        **self.kwargs)
        if self.fit_source:
            self.model_src_.fit(X[src_index], y[src_index], **fit_params_src)
        layers = self.model_src_.layers

        lambdas, trainables = self._get_lambdas_and_trainables(layers)

        self.model_tgt_ = _add_regularization(self.model_src_, lambdas,
                                              trainables)
        self.model_tgt_.fit(X[tgt_index], y[tgt_index], **fit_params_tgt)
        return self
Esempio n. 2
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit ADDA.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params_src : dict, optional (default=None)
            Arguments given to the fit process of source encoder
            and task networks (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the ADDA model,
            i.e. fitting of target encoder and discriminator.
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        if tgt_index_labeled is None:
            src_index_bis = src_index
        else:
            src_index_bis = np.concatenate((src_index, tgt_index_labeled))

        self._create_model(X.shape[1:], y.shape[1:])

        max_size = max(len(src_index_bis), len(tgt_index))
        resize_tgt_ind = np.resize(tgt_index, max_size)
        resize_src_ind = np.resize(src_index_bis, max_size)

        self.src_model_.fit(X[src_index_bis], y[src_index_bis],
                            **fit_params_src)

        self.tgt_model_.fit(
            [self.src_encoder_.predict(X[resize_src_ind]), X[resize_tgt_ind]],
            **fit_params_tgt)
        return self
Esempio n. 3
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            sample_weight=None,
            **fit_params):
        """
        Fit encoder and task networks. 
        
        Source data and unlabeled target data are used for the correlation
        alignment in the encoded space.
        
        Source data and labeled target data are used to learn the task.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        sample_weight : numpy array, optional (default=None)
            Individual weights for each sample.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        self._create_model(X.shape[1:], y.shape[1:])

        if tgt_index_labeled is None:
            task_index = src_index
        else:
            task_index = np.concatenate((src_index, tgt_index_labeled))

        max_size = max((len(src_index), len(tgt_index), len(task_index)))
        resized_src_ind = np.resize(src_index, max_size)
        resized_tgt_ind = np.resize(tgt_index, max_size)
        resized_task_ind = np.resize(task_index, max_size)

        self.model_.fit([
            X[resized_src_ind], X[resized_tgt_ind], X[resized_task_ind],
            y[resized_task_ind],
            np.ones(max_size)
        ], **fit_params)
        return self
Esempio n. 4
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            sample_weight=None,
            **fit_params):
        """
        Perfrom correlation alignement on input source data to match 
        input target data (given by ``tgt_index``).
        Then fit estimator on the aligned source data and the labeled
        target ones (given by ``tgt_index_labeled``).

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        sample_weight : numpy array, optional (default=None)
            Individual weights for each sample.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        Xs = X[src_index]
        ys = y[src_index]
        Xt = X[tgt_index]
        yt = y[tgt_index]

        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)

        self.Cs_ = np.cov(Xs,
                          rowvar=False) + self.lambdap * np.eye(Xs.shape[1])
        self.Ct_ = np.cov(Xt,
                          rowvar=False) + self.lambdap * np.eye(Xt.shape[1])

        Xs = np.matmul(Xs, linalg.inv(linalg.sqrtm(self.Cs_)))
        Xs = np.matmul(Xs, linalg.sqrtm(self.Ct_))

        if tgt_index_labeled is None:
            X = Xs
            y = ys
        else:
            X = np.concatenate((Xs, X[tgt_index_labeled]))
            y = np.concatenate((ys, y[tgt_index_labeled]))

        if sample_weight is None:
            self.estimator_.fit(X, y, **fit_params)
        else:
            if tgt_index_labeled is None:
                sample_weight = sample_weight[src_index]
            else:
                sample_weight = np.concatenate(
                    (sample_weight[src_index],
                     sample_weight[tgt_index_labeled]))
            self.estimator_.fit(X,
                                y,
                                sample_weight=sample_weight,
                                **fit_params)

        return self
Esempio n. 5
0
    def fit(self, X, y, src_index, tgt_index,
            tgt_index_labeled=None, **fit_params):
        """
        Fit KLIEP.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)
        
        if tgt_index_labeled is None:
            Xs = X[src_index]
            ys = y[src_index]
        else:
            Xs = X[np.concatenate(
                (src_index, tgt_index_labeled)
            )]
            ys = y[np.concatenate(
                (src_index, tgt_index_labeled)
            )]
        Xt = X[tgt_index]
        
        self.j_scores_ = []
        
        if hasattr(self.sigmas, "__len__") and len(self.sigmas) > 1:
            for sigma in self.sigmas:
                split = int(len(tgt_index) / self.cv)
                j_scores = []
                for i in range(self.cv):
                    if i == self.cv-1:
                        test_index = tgt_index[i * split:]
                    else:
                        test_index = tgt_index[i * split:
                                               (i + 1) * split]
                    train_index = np.array(
                        list(set(tgt_index) - set(test_index))
                    )

                    alphas, centers = self._fit(Xs,
                                                X[train_index],
                                                sigma)
                    
                    j_score = (1 / len(test_index)) * np.sum(np.log(
                        np.dot(
                            np.transpose(alphas),
                            pairwise.rbf_kernel(centers,
                                                X[test_index],
                                                sigma)
                        )
                    ))
                    j_scores.append(j_score)
                self.j_scores_.append(np.mean(j_score))
            self.sigma_ = self.sigmas[np.argmax(self.j_scores_)]
        else:
            try:
                self.sigma_ = self.sigmas[0]
            except:
                self.sigma_ = self.sigmas
        
        self.alphas_, self.centers_ = self._fit(Xs, Xt, self.sigma_)
        
        self.weights_ = np.dot(
            np.transpose(self.alphas_),
            pairwise.rbf_kernel(self.centers_, Xs, self.sigma_)
            ).ravel()
        
        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)
        
        try:
            self.estimator_.fit(Xs, ys, 
                                sample_weight=self.weights_,
                                **fit_params)
        except:
            bootstrap_index = np.random.choice(
            len(Xs), size=len(Xs), replace=True,
            p=self.weights_ / self.weights_.sum())
            self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index],
                          **fit_params)
        return self
Esempio n. 6
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            **fit_params):
        """
        Fit DANN.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.

        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params : key, value arguments
            Arguments given to the fit method of DANN model
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if self.lambdap is None:
            self.lambdap = K.variable(0.)

        self._create_model(X.shape[1:], y.shape[1:])

        if tgt_index_labeled is None:
            task_index = src_index
        else:
            task_index = np.concatenate((src_index, tgt_index_labeled))
        disc_index = np.concatenate((src_index, tgt_index))
        labels = np.array([0] * len(src_index) + [1] * len(tgt_index))

        max_size = max(len(disc_index), len(task_index))
        resized_task_ind = np.resize(task_index, max_size)
        resized_disc_ind = np.resize(disc_index, max_size)

        if self.lambdap is None:
            callback = _IncreaseLambda(self.lambdap, self.gamma)
            if "callbacks" in fit_prams:
                fit_prams["callbacks"].append(callback)
            else:
                fit_prams["callbacks"] = [callback]

        self.model_.fit([X[resized_task_ind], X[resized_disc_ind]],
                        [y[resized_task_ind], labels[resized_disc_ind]],
                        **fit_params)
        return self
Esempio n. 7
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit RegularTransferLR
        
        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data. Binary {-1, 1}

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
        
        fit_params_src : dict, optional (default=None)
            Arguments given to the fit method of the
            source estimator (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the
            target estimator (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        check_indexes(src_index, tgt_index)

        if not np.all(
                np.isin(y[np.concatenate((src_index, tgt_index))], [-1., 1.])):
            raise ValueError("y values should be in {-1, 1}")

        self.estimator_src_ = check_estimator(self.get_estimator,
                                              **self.kwargs)
        if (not isinstance(self.estimator_src_, LogisticRegression)
                and not isinstance(self.estimator_src_, RidgeClassifier)):
            raise ValueError("'get_estimator' should return a"
                             " LogisticRegression or RidgeClassifier"
                             " instance.")
        if self.fit_source:
            self.estimator_src_.fit(X[src_index], y[src_index],
                                    **fit_params_src)

        if self.intercept:
            beta_src = np.concatenate(
                (np.array([self.estimator_src_.intercept_[0]]),
                 self.estimator_src_.coef_[0]))
            Xt = np.concatenate((np.ones((len(tgt_index), 1)), X[tgt_index]),
                                axis=1)
        else:
            beta_src = self.estimator_src_.coef_[0]
            Xt = X[tgt_index]
        yt = y[tgt_index]

        #         assert False, "%s"%str(beta_src)

        def func(beta):
            return (np.sum(
                np.log(1 + np.exp(-yt * Xt.dot(beta.reshape(-1, 1)).ravel())))
                    + self.lambdap * np.linalg.norm(beta - beta_src)**2)

        beta_tgt = minimize(func, beta_src)['x']

        if self.intercept:
            self.intercept_ = beta_tgt[0]
            self.coef_ = beta_tgt[1:]
        else:
            self.intercept_ = 0.
            self.coef_ = beta_tgt
        return self
Esempio n. 8
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit RegularTransferLR
        
        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
        
        fit_params_src : dict, optional (default=None)
            Arguments given to the fit method of the
            source estimator (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the
            target estimator (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        check_indexes(src_index, tgt_index)

        self.estimator_src_ = check_estimator(self.get_estimator,
                                              **self.kwargs)
        if (not isinstance(self.estimator_src_, LinearRegression)
                and not isinstance(self.estimator_src_, Ridge)):
            raise ValueError("'get_estimator' should return a"
                             " LinearRegression or Ridge instance.")
        if self.fit_source:
            self.estimator_src_.fit(X[src_index], y[src_index],
                                    **fit_params_src)

        if self.intercept:
            beta_src = np.concatenate(
                (np.array([self.estimator_src_.intercept_]),
                 self.estimator_src_.coef_))
            Xt = np.concatenate((np.ones((len(tgt_index), 1)), X[tgt_index]),
                                axis=1)
        else:
            beta_src = self.estimator_src_.coef_
            Xt = X[tgt_index]
        yt = y[tgt_index]

        def func(beta):
            return (np.linalg.norm(Xt.dot(beta.reshape(-1, 1)) - yt)**2 +
                    self.lambdap * np.linalg.norm(beta - beta_src)**2)

        beta_tgt = minimize(func, beta_src)['x']

        if self.intercept:
            self.intercept_ = beta_tgt[0]
            self.coef_ = beta_tgt[1:]
        else:
            self.intercept_ = 0.
            self.coef_ = beta_tgt
        return self
Esempio n. 9
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            fit_params_ae=None,
            **fit_params):
        """
        Fit mSDA.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params_ae : dict, optional (default=None)
            Arguments given to the fit process of the autoencoder
            (epochs, batch_size...).
            If None, ``fit_params_ae = fit_params``
        
        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if fit_params_ae is None:
            fit_params_ae = fit_params

        ae_index = np.concatenate((src_index, tgt_index))
        if tgt_index_labeled is None:
            task_index = src_index
        else:
            task_index = np.concatenate((src_index, tgt_index_labeled))

        self.encoder_ = check_network(self.get_encoder,
                                      "get_encoder",
                                      input_shape=X.shape[1:],
                                      **self.enc_params)
        self.decoder_ = check_network(
            self.get_decoder,
            "get_decoder",
            input_shape=self.encoder_.output_shape[1:],
            output_shape=X.shape[1:],
            **self.dec_params)
        self.estimator_ = check_estimator(self.get_estimator,
                                          **self.est_params)

        inputs = Input(X.shape[1:])
        noised = GaussianNoise(self.noise_lvl)(inputs)
        encoded = self.encoder_(noised)
        decoded = self.decoder_(encoded)
        self.autoencoder_ = Model(inputs, decoded, name="AutoEncoder")

        compil_params = copy.deepcopy(self.compil_params)
        if not "loss" in compil_params:
            compil_params["loss"] = "mean_squared_error"
        if not "optimizer" in compil_params:
            compil_params["optimizer"] = "adam"

        self.autoencoder_.compile(**compil_params)

        self.autoencoder_.fit(X[ae_index], X[ae_index], **fit_params_ae)
        self.estimator_.fit(self.encoder_.predict(X[task_index]),
                            y[task_index], **fit_params)
        return self
Esempio n. 10
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            **fit_params):
        """
        Fit KMM.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if tgt_index_labeled is None:
            Xs = X[src_index]
            ys = y[src_index]
        else:
            Xs = X[np.concatenate((src_index, tgt_index_labeled))]
            ys = y[np.concatenate((src_index, tgt_index_labeled))]
        Xt = X[tgt_index]

        n_s = len(Xs)
        n_t = len(Xt)

        # Get epsilon
        if self.epsilon is None:
            self.epsilon = (np.sqrt(n_s) - 1) / np.sqrt(n_s)

        # Compute Kernel Matrix
        K = pairwise.pairwise_kernels(Xs,
                                      Xs,
                                      metric=self.kernel,
                                      **self.kernel_params)
        K = (1 / 2) * (K + K.transpose())

        # Compute q
        kappa = pairwise.pairwise_kernels(Xs,
                                          Xt,
                                          metric=self.kernel,
                                          **self.kernel_params)
        kappa = (n_s / n_t) * np.dot(kappa, np.ones((n_t, 1)))

        constraints = LinearConstraint(np.ones((1, n_s)),
                                       lb=n_s * (1 - self.epsilon),
                                       ub=n_s * (1 + self.epsilon))

        def func(x):
            return (1 / 2) * x.T @ (K @ x) - kappa.T @ x

        weights = minimize(func,
                           x0=np.ones((n_s, 1)),
                           bounds=[(0, self.B)] * n_s,
                           constraints=constraints)['x']

        self.weights_ = np.array(weights).ravel()

        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)

        try:
            self.estimator_.fit(Xs,
                                ys,
                                sample_weight=self.weights_,
                                **fit_params)
        except:
            bootstrap_index = np.random.choice(len(Xs),
                                               size=len(Xs),
                                               replace=True,
                                               p=self.weights_ /
                                               self.weights_.sum())
            self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index],
                                **fit_params)
        return self
Esempio n. 11
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            sample_weight=None,
            **fit_params):
        """
        Fit TwoStageTrAdaBoostR2.
        
        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.

        sample_weight : numpy array, optional (default=None)
            Individual weights for each sample.
        
        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index)

        n_s = len(src_index)
        n_t = len(tgt_index)

        if sample_weight is None:
            sample_weight_src = np.ones(n_s) / (n_s + n_t)
            sample_weight_tgt = np.ones(n_t) / (n_s + n_t)
        else:
            sum_weights = (sample_weight[src_index].sum() +
                           sample_weight[tgt_index].sum())
            sample_weight_src = sample_weight[src_index] / sum_weights
            sample_weight_tgt = sample_weight[tgt_index] / sum_weights

        self.sample_weights_src_ = []
        self.sample_weights_tgt_ = []
        self.estimators_ = []
        self.estimator_errors_ = []

        for iboost in range(self.n_estimators):
            self.sample_weights_src_.append(np.copy(sample_weight_src))
            self.sample_weights_tgt_.append(np.copy(sample_weight_tgt))

            cv_score = self._cross_val_score(X, y, src_index, tgt_index,
                                             sample_weight_src,
                                             sample_weight_tgt, **fit_params)

            self.estimator_errors_.append(cv_score.mean())

            sample_weight_src, sample_weight_tgt = self._boost(
                iboost, X, y, src_index, tgt_index, sample_weight_src,
                sample_weight_tgt, **fit_params)

            if sample_weight_src is None:
                break

            sum_weights = (sample_weight_src.sum() + sample_weight_tgt.sum())
            sample_weight_src = sample_weight_src / sum_weights
            sample_weight_tgt = sample_weight_tgt / sum_weights

        self.estimator_errors_ = np.array(self.estimator_errors_)
        return self