def testEuclidDist(self): # select some block of data from already generated data = datasets['uni4large'].samples[:5, :8] ed = squared_euclidean_distance(data) # XXX not sure if that is right: 'weight' seems to be given by # feature (i.e. column), but distance is between samples (i.e. rows) # current behavior is: true_size = (5, 5) self.failUnless(ed.shape == true_size) # slow version to compute distance matrix ed_manual = N.zeros(true_size, 'd') for i in range(true_size[0]): for j in range(true_size[1]): #ed_manual[i,j] = N.sqrt(((data[i,:] - data[j,:] )** 2).sum()) ed_manual[i,j] = ((data[i,:] - data[j,:] )** 2).sum() ed_manual[ed_manual < 0] = 0 self.failUnless(N.diag(ed_manual).sum() < 0.0000000001) self.failUnless(N.diag(ed).sum() < 0.0000000001) # let see whether Kernel does the same self.failUnless((ed - ed_manual).sum() < 0.0000001)
def plot_samples_distance(dataset, sortbyattr=None): """Plot the euclidean distances between all samples of a dataset. Parameters ---------- dataset : Dataset Providing the samples. sortbyattr : None or str If None, the samples distances will be in the same order as their appearance in the dataset. Alternatively, the name of a samples attribute can be given, which wil then be used to sort/group the samples, e.g. to investigate the similarity samples by label or by chunks. """ if sortbyattr is not None: slicer = [] for attr in dataset.sa[sortbyattr].unique: slicer += \ get_samples_by_attr(dataset, sortbyattr, attr).tolist() samples = dataset.samples[slicer] else: samples = dataset.samples ed = np.sqrt(squared_euclidean_distance(samples)) pl.imshow(ed) pl.colorbar()
def test_euclid_dist(self): """Euclidean distance kernel testing""" # select some block of data from already generated data = datasets['uni4large'].samples[:5, :8] ed = squared_euclidean_distance(data) # XXX not sure if that is right: 'weight' seems to be given by # feature (i.e. column), but distance is between samples (i.e. rows) # current behavior is: true_size = (5, 5) self.failUnless(ed.shape == true_size) # slow version to compute distance matrix ed_manual = np.zeros(true_size, 'd') for i in range(true_size[0]): for j in range(true_size[1]): #ed_manual[i,j] = np.sqrt(((data[i,:] - data[j,:] )** 2).sum()) ed_manual[i, j] = ((data[i, :] - data[j, :])**2).sum() ed_manual[ed_manual < 0] = 0 self.failUnless(np.diag(ed_manual).sum() < 0.0000000001) self.failUnless(np.diag(ed).sum() < 0.0000000001) # let see whether Kernel does the same self.failUnless((ed - ed_manual).sum() < 0.0000001)
def plotSamplesDistance(dataset, sortbyattr=None): """Plot the euclidean distances between all samples of a dataset. :Parameters: dataset: Dataset Providing the samples. sortbyattr: None | str If None, the samples distances will be in the same order as their appearance in the dataset. Alternatively, the name of a samples attribute can be given, which wil then be used to sort/group the samples, e.g. to investigate the similarity samples by label or by chunks. """ if sortbyattr is not None: slicer = [] for attr in dataset.__getattribute__('unique' + sortbyattr): slicer += \ dataset.__getattribute__('idsby' + sortbyattr)(attr).tolist() samples = dataset.samples[slicer] else: samples = dataset.samples ed = N.sqrt(squared_euclidean_distance(samples)) P.imshow(ed) P.colorbar()
def plot_samples_distance(dataset, sortbyattr=None): """Plot the euclidean distances between all samples of a dataset. Parameters ---------- dataset : Dataset Providing the samples. sortbyattr : None or str If None, the samples distances will be in the same order as their appearance in the dataset. Alternatively, the name of a samples attribute can be given, which wil then be used to sort/group the samples, e.g. to investigate the similarity samples by label or by chunks. """ if sortbyattr is not None: slicer = [] for attr in dataset.sa[sortbyattr].unique: slicer += \ get_samples_by_attr(dataset, sortbyattr, attr).tolist() samples = dataset.samples[slicer] else: samples = dataset.samples ed = np.sqrt(squared_euclidean_distance(samples)) pl.imshow(ed) pl.colorbar()
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance( data1, data2, weight=1.0 / (self.length_scale ** 2)) self._k = \ self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ # weighted squared euclidean distance matrix: self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2)) self._k = self.sigma_f**2 * np.exp(-0.5*self.wdm2)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance( data1, data2, weight=1.0 / (self.length_scale ** 2)) self._k = \ self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ # weighted squared euclidean distance matrix: self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2)) self._k = self.sigma_f**2 * np.exp(-0.5*self.wdm2)
def compute(self, data1, data2=None): """Compute kernel matrix. :Parameters: data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ tmp = squared_euclidean_distance( data1, data2, weight=1.0 / (self.length_scale ** 2)) self.kernel_matrix = \ self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha return self.kernel_matrix
def compute(self, data1, data2=None): """Compute kernel matrix. :Parameters: data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ # weighted squared euclidean distance matrix: self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2)) self.kernel_matrix = self.sigma_f**2 * N.exp(-0.5*self.wdm2) # XXX EO: old implementation: # self.kernel_matrix = \ # self.sigma_f * N.exp(-squared_euclidean_distance( # data1, data2, weight=0.5 / (self.length_scale ** 2))) return self.kernel_matrix
def compute(self, data1, data2=None): """Compute kernel matrix. :Parameters: data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ # XXX the following computation can be (maybe) made more # efficient since length_scale is squared and then # square-rooted uselessly. # Weighted euclidean distance matrix: self.wdm = N.sqrt(squared_euclidean_distance( data1, data2, weight=(self.length_scale**-2))) self.kernel_matrix = \ self.sigma_f**2 * N.exp(-self.wdm) return self.kernel_matrix
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ params = self.params # XXX the following computation can be (maybe) made more # efficient since length_scale is squared and then # square-rooted uselessly. # Weighted euclidean distance matrix: self.wdm = np.sqrt(squared_euclidean_distance( data1, data2, weight=(params.length_scale**-2))) self._k = \ params.sigma_f**2 * np.exp(-self.wdm)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ params = self.params # XXX the following computation can be (maybe) made more # efficient since length_scale is squared and then # square-rooted uselessly. # Weighted euclidean distance matrix: self.wdm = np.sqrt(squared_euclidean_distance( data1, data2, weight=(params.length_scale**-2))) self._k = \ params.sigma_f**2 * np.exp(-self.wdm)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance( data1, data2, weight=0.5 / (self.length_scale ** 2)) if self.numerator == 3.0: tmp = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(3.0) * tmp) \ * np.exp(-np.sqrt(3.0) * tmp) elif self.numerator == 5.0: tmp2 = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \ * np.exp(-np.sqrt(5.0) * tmp2)
def _compute(self, data1, data2): """Compute kernel matrix. Parameters ---------- data1 : numpy.ndarray lhs data data2 : numpy.ndarray rhs data """ tmp = squared_euclidean_distance( data1, data2, weight=0.5 / (self.length_scale ** 2)) if self.numerator == 3.0: tmp = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(3.0) * tmp) \ * np.exp(-np.sqrt(3.0) * tmp) elif self.numerator == 5.0: tmp2 = np.sqrt(tmp) self._k = \ self.sigma_f**2 * (1.0 + np.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \ * np.exp(-np.sqrt(5.0) * tmp2)
def compute(self, data1, data2=None): """Compute kernel matrix. :Parameters: data1 : numpy.ndarray data data2 : numpy.ndarray data (Defaults to None) """ tmp = squared_euclidean_distance( data1, data2, weight=0.5 / (self.length_scale ** 2)) if self.numerator == 3.0: tmp = N.sqrt(tmp) self.kernel_matrix = \ self.sigma_f**2 * (1.0 + N.sqrt(3.0) * tmp) \ * N.exp(-N.sqrt(3.0) * tmp) elif self.numerator == 5.0: tmp2 = N.sqrt(tmp) self.kernel_matrix = \ self.sigma_f**2 * (1.0 + N.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \ * N.exp(-N.sqrt(5.0) * tmp2) return self.kernel_matrix
def _compute(self, d1, d2): # Do the Rbf self._k = np.exp(-squared_euclidean_distance(d1,d2) / self.params.sigma)
def _compute(self, d1, d2): # Do the Rbf self._k = np.exp(-squared_euclidean_distance(d1,d2) / self.params.sigma)