def fit(self, X, y, src_index, tgt_index, tgt_index_labeled=None, **fit_params): """ Fit KLIEP. Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. tgt_index_labeled : iterable, optional (default=None) indexes of target labeled data in X, y. fit_params : key, value arguments Arguments given to the fit method of the estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ check_indexes(src_index, tgt_index, tgt_index_labeled) if tgt_index_labeled is None: Xs = X[src_index] ys = y[src_index] else: Xs = X[np.concatenate( (src_index, tgt_index_labeled) )] ys = y[np.concatenate( (src_index, tgt_index_labeled) )] Xt = X[tgt_index] self.j_scores_ = [] if hasattr(self.sigmas, "__len__") and len(self.sigmas) > 1: for sigma in self.sigmas: split = int(len(tgt_index) / self.cv) j_scores = [] for i in range(self.cv): if i == self.cv-1: test_index = tgt_index[i * split:] else: test_index = tgt_index[i * split: (i + 1) * split] train_index = np.array( list(set(tgt_index) - set(test_index)) ) alphas, centers = self._fit(Xs, X[train_index], sigma) j_score = (1 / len(test_index)) * np.sum(np.log( np.dot( np.transpose(alphas), pairwise.rbf_kernel(centers, X[test_index], sigma) ) )) j_scores.append(j_score) self.j_scores_.append(np.mean(j_score)) self.sigma_ = self.sigmas[np.argmax(self.j_scores_)] else: try: self.sigma_ = self.sigmas[0] except: self.sigma_ = self.sigmas self.alphas_, self.centers_ = self._fit(Xs, Xt, self.sigma_) self.weights_ = np.dot( np.transpose(self.alphas_), pairwise.rbf_kernel(self.centers_, Xs, self.sigma_) ).ravel() self.estimator_ = check_estimator(self.get_estimator, **self.kwargs) try: self.estimator_.fit(Xs, ys, sample_weight=self.weights_, **fit_params) except: bootstrap_index = np.random.choice( len(Xs), size=len(Xs), replace=True, p=self.weights_ / self.weights_.sum()) self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index], **fit_params) return self
def fit(self, X, y, src_index, tgt_index, tgt_index_labeled=None, sample_weight=None, **fit_params): """ Perfrom correlation alignement on input source data to match input target data (given by ``tgt_index``). Then fit estimator on the aligned source data and the labeled target ones (given by ``tgt_index_labeled``). Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. tgt_index_labeled : iterable, optional (default=None) indexes of target labeled data in X, y. sample_weight : numpy array, optional (default=None) Individual weights for each sample. fit_params : key, value arguments Arguments given to the fit method of the estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ check_indexes(src_index, tgt_index, tgt_index_labeled) Xs = X[src_index] ys = y[src_index] Xt = X[tgt_index] yt = y[tgt_index] self.estimator_ = check_estimator(self.get_estimator, **self.kwargs) self.Cs_ = np.cov(Xs, rowvar=False) + self.lambdap * np.eye(Xs.shape[1]) self.Ct_ = np.cov(Xt, rowvar=False) + self.lambdap * np.eye(Xt.shape[1]) Xs = np.matmul(Xs, linalg.inv(linalg.sqrtm(self.Cs_))) Xs = np.matmul(Xs, linalg.sqrtm(self.Ct_)) if tgt_index_labeled is None: X = Xs y = ys else: X = np.concatenate((Xs, X[tgt_index_labeled])) y = np.concatenate((ys, y[tgt_index_labeled])) if sample_weight is None: self.estimator_.fit(X, y, **fit_params) else: if tgt_index_labeled is None: sample_weight = sample_weight[src_index] else: sample_weight = np.concatenate( (sample_weight[src_index], sample_weight[tgt_index_labeled])) self.estimator_.fit(X, y, sample_weight=sample_weight, **fit_params) return self
def fit(self, X, y, src_index, tgt_index, fit_params_src=None, **fit_params_tgt): """ Fit RegularTransferLR Parameters ---------- X : numpy array Input data. y : numpy array Output data. Binary {-1, 1} src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. fit_params_src : dict, optional (default=None) Arguments given to the fit method of the source estimator (epochs, batch_size...). If None, ``fit_params_src = fit_params_tgt`` fit_params_tgt : key, value arguments Arguments given to the fit method of the target estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ if fit_params_src is None: fit_params_src = fit_params_tgt check_indexes(src_index, tgt_index) if not np.all( np.isin(y[np.concatenate((src_index, tgt_index))], [-1., 1.])): raise ValueError("y values should be in {-1, 1}") self.estimator_src_ = check_estimator(self.get_estimator, **self.kwargs) if (not isinstance(self.estimator_src_, LogisticRegression) and not isinstance(self.estimator_src_, RidgeClassifier)): raise ValueError("'get_estimator' should return a" " LogisticRegression or RidgeClassifier" " instance.") if self.fit_source: self.estimator_src_.fit(X[src_index], y[src_index], **fit_params_src) if self.intercept: beta_src = np.concatenate( (np.array([self.estimator_src_.intercept_[0]]), self.estimator_src_.coef_[0])) Xt = np.concatenate((np.ones((len(tgt_index), 1)), X[tgt_index]), axis=1) else: beta_src = self.estimator_src_.coef_[0] Xt = X[tgt_index] yt = y[tgt_index] # assert False, "%s"%str(beta_src) def func(beta): return (np.sum( np.log(1 + np.exp(-yt * Xt.dot(beta.reshape(-1, 1)).ravel()))) + self.lambdap * np.linalg.norm(beta - beta_src)**2) beta_tgt = minimize(func, beta_src)['x'] if self.intercept: self.intercept_ = beta_tgt[0] self.coef_ = beta_tgt[1:] else: self.intercept_ = 0. self.coef_ = beta_tgt return self
def fit(self, X, y, src_index, tgt_index, fit_params_src=None, **fit_params_tgt): """ Fit RegularTransferLR Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. fit_params_src : dict, optional (default=None) Arguments given to the fit method of the source estimator (epochs, batch_size...). If None, ``fit_params_src = fit_params_tgt`` fit_params_tgt : key, value arguments Arguments given to the fit method of the target estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ if fit_params_src is None: fit_params_src = fit_params_tgt check_indexes(src_index, tgt_index) self.estimator_src_ = check_estimator(self.get_estimator, **self.kwargs) if (not isinstance(self.estimator_src_, LinearRegression) and not isinstance(self.estimator_src_, Ridge)): raise ValueError("'get_estimator' should return a" " LinearRegression or Ridge instance.") if self.fit_source: self.estimator_src_.fit(X[src_index], y[src_index], **fit_params_src) if self.intercept: beta_src = np.concatenate( (np.array([self.estimator_src_.intercept_]), self.estimator_src_.coef_)) Xt = np.concatenate((np.ones((len(tgt_index), 1)), X[tgt_index]), axis=1) else: beta_src = self.estimator_src_.coef_ Xt = X[tgt_index] yt = y[tgt_index] def func(beta): return (np.linalg.norm(Xt.dot(beta.reshape(-1, 1)) - yt)**2 + self.lambdap * np.linalg.norm(beta - beta_src)**2) beta_tgt = minimize(func, beta_src)['x'] if self.intercept: self.intercept_ = beta_tgt[0] self.coef_ = beta_tgt[1:] else: self.intercept_ = 0. self.coef_ = beta_tgt return self
def fit(self, X, y, src_index, tgt_index, tgt_index_labeled=None, fit_params_ae=None, **fit_params): """ Fit mSDA. Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. tgt_index_labeled : iterable, optional (default=None) indexes of target labeled data in X, y. fit_params_ae : dict, optional (default=None) Arguments given to the fit process of the autoencoder (epochs, batch_size...). If None, ``fit_params_ae = fit_params`` fit_params : key, value arguments Arguments given to the fit method of the estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ check_indexes(src_index, tgt_index, tgt_index_labeled) if fit_params_ae is None: fit_params_ae = fit_params ae_index = np.concatenate((src_index, tgt_index)) if tgt_index_labeled is None: task_index = src_index else: task_index = np.concatenate((src_index, tgt_index_labeled)) self.encoder_ = check_network(self.get_encoder, "get_encoder", input_shape=X.shape[1:], **self.enc_params) self.decoder_ = check_network( self.get_decoder, "get_decoder", input_shape=self.encoder_.output_shape[1:], output_shape=X.shape[1:], **self.dec_params) self.estimator_ = check_estimator(self.get_estimator, **self.est_params) inputs = Input(X.shape[1:]) noised = GaussianNoise(self.noise_lvl)(inputs) encoded = self.encoder_(noised) decoded = self.decoder_(encoded) self.autoencoder_ = Model(inputs, decoded, name="AutoEncoder") compil_params = copy.deepcopy(self.compil_params) if not "loss" in compil_params: compil_params["loss"] = "mean_squared_error" if not "optimizer" in compil_params: compil_params["optimizer"] = "adam" self.autoencoder_.compile(**compil_params) self.autoencoder_.fit(X[ae_index], X[ae_index], **fit_params_ae) self.estimator_.fit(self.encoder_.predict(X[task_index]), y[task_index], **fit_params) return self
def fit(self, X, y, src_index, tgt_index, tgt_index_labeled=None, **fit_params): """ Fit KMM. Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. tgt_index_labeled : iterable, optional (default=None) indexes of target labeled data in X, y. fit_params : key, value arguments Arguments given to the fit method of the estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ check_indexes(src_index, tgt_index, tgt_index_labeled) if tgt_index_labeled is None: Xs = X[src_index] ys = y[src_index] else: Xs = X[np.concatenate((src_index, tgt_index_labeled))] ys = y[np.concatenate((src_index, tgt_index_labeled))] Xt = X[tgt_index] n_s = len(Xs) n_t = len(Xt) # Get epsilon if self.epsilon is None: self.epsilon = (np.sqrt(n_s) - 1) / np.sqrt(n_s) # Compute Kernel Matrix K = pairwise.pairwise_kernels(Xs, Xs, metric=self.kernel, **self.kernel_params) K = (1 / 2) * (K + K.transpose()) # Compute q kappa = pairwise.pairwise_kernels(Xs, Xt, metric=self.kernel, **self.kernel_params) kappa = (n_s / n_t) * np.dot(kappa, np.ones((n_t, 1))) constraints = LinearConstraint(np.ones((1, n_s)), lb=n_s * (1 - self.epsilon), ub=n_s * (1 + self.epsilon)) def func(x): return (1 / 2) * x.T @ (K @ x) - kappa.T @ x weights = minimize(func, x0=np.ones((n_s, 1)), bounds=[(0, self.B)] * n_s, constraints=constraints)['x'] self.weights_ = np.array(weights).ravel() self.estimator_ = check_estimator(self.get_estimator, **self.kwargs) try: self.estimator_.fit(Xs, ys, sample_weight=self.weights_, **fit_params) except: bootstrap_index = np.random.choice(len(Xs), size=len(Xs), replace=True, p=self.weights_ / self.weights_.sum()) self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index], **fit_params) return self
def _boost(self, iboost, X, y, src_index, tgt_index, sample_weight_src, sample_weight_tgt, **fit_params): index = np.concatenate((src_index, tgt_index)) sample_weight = np.concatenate((sample_weight_src, sample_weight_tgt)) estimator = check_estimator(self.get_estimator, **self.kwargs) try: estimator.fit(X[index], y[index], sample_weight=sample_weight, **fit_params) except: bootstrap_index = np.random.choice(index, size=len(index), replace=True, p=sample_weight) estimator.fit(X[bootstrap_index], y[bootstrap_index], **fit_params) error_vect_src = np.abs( estimator.predict(X[src_index]).ravel() - y[src_index]) error_vect_tgt = np.abs( estimator.predict(X[tgt_index]).ravel() - y[tgt_index]) error_vect = np.concatenate((error_vect_src, error_vect_tgt)) if isinstance(self, TrAdaBoostR2) or isinstance(self, _AdaBoostR2): error_max = error_vect.max() if error_max != 0: error_vect /= error_max error_vect_src /= error_max error_vect_tgt /= error_max if isinstance(self, _AdaBoostR2): estimator_error = (sample_weight * error_vect).sum() else: estimator_error = ((sample_weight_tgt * error_vect_tgt).sum() / (2 * sample_weight_tgt.sum())) assert estimator_error < 0.5, ( "est: %s, %s, %s" % (str(error_vect_tgt), str( y[tgt_index]), str(estimator.predict(X[tgt_index]).ravel()))) if estimator_error >= 0.5: return None, None beta_t = estimator_error / (1. - estimator_error) beta_s = 1. / ( 1. + np.sqrt(2. * np.log(len(src_index)) / self.n_estimators)) if not iboost == self.n_estimators - 1: if isinstance(self, _AdaBoostR2): sample_weight_tgt = (sample_weight_tgt * np.power(beta_t, (1 - error_vect_tgt))) sample_weight_tgt *= ((1. - sample_weight_src.sum()) / sample_weight_tgt.sum()) else: # Source updating weights sample_weight_src *= np.power(beta_s, error_vect_src) # Target updating weights sample_weight_tgt *= np.power(beta_t, -error_vect_tgt) self.estimators_.append(estimator) self.estimator_errors_.append(estimator_error) return sample_weight_src, sample_weight_tgt