def fit(self, X, y): """Fit the model Parameters ---------- X: 2d array, shape (n_samples, n_features) Data array y: 2d array, shape (n_pairs, 3) Array containing pairs elements indices and associated label. The indices of pairs elements are contained in ``y[:, :2]``, while the labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``. """ # pylint: disable=E1101 X = np.asarray(X) y = np.asarray(y) pairs = y[:, :2].astype("int") labels = y[:, 2] # initialize with whiten PCA L0 = pca(X, self.n_components) # Normalize to have unit average distance L0 /= np.sqrt(compute_distances(X, pairs, proj_mat=L0).mean()) b0 = 1.0 self._diff = compute_differences(X, pairs) self._labels = labels n_pairs = len(pairs) n_samples, self.n_features = X.shape if self.kernel: self._U = np.zeros((n_pairs, n_samples)) self._U[range(n_pairs), pairs[:, 0]] = 1.0 self._U[range(n_pairs), pairs[:, 1]] = -1.0 self._U = csr_matrix(self._U) x0 = MatThreshold(L0, b0) if self.callback is not None: self.callback(x0) L, self.threshold_ = lgbopt.fmin_gd(self._compute_obj, self._compute_grad, x0, callback=self.callback, inner=MatThreshold.dot, **self.opt_args)[0] self.coefs_ = L.T del self._diff, self._labels if self.kernel: del self._U return self
def fit(self, X, y): """Fit the model Parameters ---------- X: 2d array, shape (n_samples, n_features) Data array y: 2d array, shape (n_pairs, 3) Array containing pairs elements indices and associated label. The indices of pairs elements are contained in ``y[:, :2]``, while the labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``. """ # pylint: disable=E1101 X = np.asarray(X) y = np.asarray(y) pairs = y[:, :2].astype("int") labels = y[:, 2] # initialize with whiten PCA L0 = pca(X, self.n_components) # Normalize to have unit average distance L0 /= np.sqrt(compute_distances(X, pairs, proj_mat=L0).mean()) b0 = 1.0 self._diff = compute_differences(X, pairs) self._labels = labels n_pairs = len(pairs) n_samples, self.n_features = X.shape if self.kernel: self._U = np.zeros((n_pairs, n_samples)) self._U[range(n_pairs), pairs[:, 0]] = 1.0 self._U[range(n_pairs), pairs[:, 1]] = -1.0 self._U = csr_matrix(self._U) x0 = MatThreshold(L0, b0) if self.callback is not None: self.callback(x0) L, self.threshold_ = lgbopt.fmin_gd( self._compute_obj, self._compute_grad, x0, callback=self.callback, inner=MatThreshold.dot, **self.opt_args)[0] self.coefs_ = L.T del self._diff, self._labels if self.kernel: del self._U return self
def fit(self, X, y): """Fit the model Parameters ---------- X: 2d array, shape (n_samples, n_features) Data array y: 2d array, shape (n_pairs, 3) Array containing pairs elements indices and associated label. The indices of pairs elements are contained in ``y[:, :2]``, while the labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``. """ # pylint: disable=E1101 X1 = np.asarray(X[0]) X2 = np.asarray(X[1]) y = np.asarray(y) pairs = y[:, :2].astype("int") labels = y[:, 2] # initialize with whiten PCA L0 = pca(np.hstack(X), self.n_components) A0 = L0[:, :X1.shape[1]] B0 = L0[:, X1.shape[1]:] # Normalize to have unit average distance dist = np.sqrt( compute_cm_distances(X1, X2, pairs, proj_mat1=A0, proj_mat2=B0).mean()) b0 = 1.0 A0 /= dist B0 /= dist self._labels = labels n_pairs = len(pairs) n_samples, self.n_features1 = X1.shape if self.kernel: self._U = np.zeros((n_pairs, n_samples)) self._U[range(n_pairs), pairs[:, 0]] = 1.0 self._U = csr_matrix(self._U) self._V = np.zeros((n_pairs, n_samples)) self._V[range(n_pairs), pairs[:, 1]] = 1.0 self._V = csr_matrix(self._V) self._X1 = X1 self._X2 = X2 self._pairs = pairs x0 = Mat2Threshold(A0, B0, b0) if self.callback is not None: self.callback(x0) A, B, self.threshold_ = lgbopt.fmin_gd(self._compute_obj, self._compute_grad, x0, callback=self.callback, inner=Mat2Threshold.dot, **self.opt_args)[0] self.coefs_ = [A.T, B.T] del self._labels, self._X1, self._X2, self._pairs if self.kernel: del self._U, self._V return self
def fit(self, X, y): """Fit the model Parameters ---------- X: 2d array, shape (n_samples, n_features) Data array y: 2d array, shape (n_pairs, 3) Array containing pairs elements indices and associated label. The indices of pairs elements are contained in ``y[:, :2]``, while the labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``. """ # pylint: disable=E1101 X1 = np.asarray(X[0]) X2 = np.asarray(X[1]) y = np.asarray(y) pairs = y[:, :2].astype("int") labels = y[:, 2] # initialize with whiten PCA L0 = pca(np.hstack(X), self.n_components) A0 = L0[:, :X1.shape[1]] B0 = L0[:, X1.shape[1]:] # Normalize to have unit average distance dist = np.sqrt(compute_cm_distances(X1, X2, pairs, proj_mat1=A0, proj_mat2=B0).mean()) b0 = 1.0 A0 /= dist B0 /= dist self._labels = labels n_pairs = len(pairs) n_samples, self.n_features1 = X1.shape if self.kernel: self._U = np.zeros((n_pairs, n_samples)) self._U[range(n_pairs), pairs[:, 0]] = 1.0 self._U = csr_matrix(self._U) self._V = np.zeros((n_pairs, n_samples)) self._V[range(n_pairs), pairs[:, 1]] = 1.0 self._V = csr_matrix(self._V) self._X1 = X1 self._X2 = X2 self._pairs = pairs x0 = Mat2Threshold(A0, B0, b0) if self.callback is not None: self.callback(x0) A, B, self.threshold_ = lgbopt.fmin_gd( self._compute_obj, self._compute_grad, x0, callback=self.callback, inner=Mat2Threshold.dot, **self.opt_args)[0] self.coefs_ = [A.T, B.T] del self._labels, self._X1, self._X2, self._pairs if self.kernel: del self._U, self._V return self