def plot_samples_distance(dataset, sortbyattr=None): """Plot the euclidean distances between all samples of a dataset. Parameters ---------- dataset : Dataset Providing the samples. sortbyattr : None or str If None, the samples distances will be in the same order as their appearance in the dataset. Alternatively, the name of a samples attribute can be given, which wil then be used to sort/group the samples, e.g. to investigate the similarity samples by label or by chunks. """ if sortbyattr is not None: slicer = [] for attr in dataset.sa[sortbyattr].unique: slicer += \ get_samples_by_attr(dataset, sortbyattr, attr).tolist() samples = dataset.samples[slicer] else: samples = dataset.samples ed = np.sqrt(squared_euclidean_distance(samples)) pl.imshow(ed, interpolation='nearest') pl.colorbar()
def test_euclid_dist(self): """Euclidean distance kernel testing""" # select some block of data from already generated data = datasets['uni4large'].samples[:5, :8] ed = squared_euclidean_distance(data) # XXX not sure if that is right: 'weight' seems to be given by # feature (i.e. column), but distance is between samples (i.e. rows) # current behavior is: true_size = (5, 5) self.assertTrue(ed.shape == true_size) # slow version to compute distance matrix ed_manual = np.zeros(true_size, 'd') for i in range(true_size[0]): for j in range(true_size[1]): #ed_manual[i,j] = np.sqrt(((data[i,:] - data[j,:] )** 2).sum()) ed_manual[i,j] = ((data[i,:] - data[j,:] )** 2).sum() ed_manual[ed_manual < 0] = 0 self.assertTrue(np.diag(ed_manual).sum() < 0.0000000001) self.assertTrue(np.diag(ed).sum() < 0.0000000001) # let see whether Kernel does the same self.assertTrue((ed - ed_manual).sum() < 0.0000001)
def test_euclid_dist(self): """Euclidean distance kernel testing""" # select some block of data from already generated data = datasets['uni4large'].samples[:5, :8] ed = squared_euclidean_distance(data) # XXX not sure if that is right: 'weight' seems to be given by # feature (i.e. column), but distance is between samples (i.e. rows) # current behavior is: true_size = (5, 5) self.assertTrue(ed.shape == true_size) # slow version to compute distance matrix ed_manual = np.zeros(true_size, 'd') for i in range(true_size[0]): for j in range(true_size[1]): #ed_manual[i,j] = np.sqrt(((data[i,:] - data[j,:] )** 2).sum()) ed_manual[i, j] = ((data[i, :] - data[j, :])**2).sum() ed_manual[ed_manual < 0] = 0 self.assertTrue(np.diag(ed_manual).sum() < 0.0000000001) self.assertTrue(np.diag(ed).sum() < 0.0000000001) # let see whether Kernel does the same self.assertTrue((ed - ed_manual).sum() < 0.0000001)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance( data1, data2, weight=1.0 / (self.length_scale ** 2)) self._k = \ self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ # weighted squared euclidean distance matrix: self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2)) self._k = self.sigma_f**2 * np.exp(-0.5*self.wdm2)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance(data1, data2, weight=1.0 / (self.length_scale**2)) self._k = \ self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ # weighted squared euclidean distance matrix: self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2)) self._k = self.sigma_f**2 * np.exp(-0.5 * self.wdm2)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ params = self.params # XXX the following computation can be (maybe) made more # efficient since length_scale is squared and then # square-rooted uselessly. # Weighted euclidean distance matrix: self.wdm = np.sqrt(squared_euclidean_distance( data1, data2, weight=(params.length_scale**-2))) self._k = \ params.sigma_f**2 * np.exp(-self.wdm)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ params = self.params # XXX the following computation can be (maybe) made more # efficient since length_scale is squared and then # square-rooted uselessly. # Weighted euclidean distance matrix: self.wdm = np.sqrt( squared_euclidean_distance(data1, data2, weight=(params.length_scale**-2))) self._k = \ params.sigma_f**2 * np.exp(-self.wdm)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance( data1, data2, weight=0.5 / (self.length_scale ** 2)) if self.numerator == 3.0: tmp = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(3.0) * tmp) \ * np.exp(-np.sqrt(3.0) * tmp) elif self.numerator == 5.0: tmp2 = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \ * np.exp(-np.sqrt(5.0) * tmp2)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance(data1, data2, weight=0.5 / (self.length_scale**2)) if self.numerator == 3.0: tmp = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(3.0) * tmp) \ * np.exp(-np.sqrt(3.0) * tmp) elif self.numerator == 5.0: tmp2 = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \ * np.exp(-np.sqrt(5.0) * tmp2)
def _compute(self, d1, d2): # Do the Rbf self._k = np.exp(-squared_euclidean_distance(d1,d2) / self.params.sigma)
def _compute(self, d1, d2): # Do the Rbf self._k = np.exp(-squared_euclidean_distance(d1, d2) / self.params.sigma)