def testPNorm_w(self): data0 = datasets['uni4large'].samples.T weight = N.abs(data0[11, :60]) self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10,:2], p=1.2, heuristic='buga') self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10,:2], weight=weight) self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10,:2], data0[:10, :3], weight=weight) self.failUnlessRaises(ValueError, pnorm_w, data0[:10,:2], data0[:10, :3], weight=weight) self.failUnlessRaises(ValueError, pnorm_w, data0[:10,:2], weight=weight) # some sanity checks for did, (data1, data2, w) in enumerate( [ (data0[:2, :60], None, None), (data0[:2, :60], data0[3:4, 1:61], None), (data0[:2, :60], None, weight), (data0[:2, :60], data0[3:4, 1:61], weight), ]): # test different norms for p in [1, 2, 1.2]: kwargs = {'data1': data1, 'data2': data2, 'weight' : w, 'p' : p} d = pnorm_w(**kwargs) # default one # to assess how far we are kwargs0 = kwargs.copy() kwargs0['data2'] = N.zeros(data1.shape) d0 = pnorm_w(**kwargs0) d0norm = N.linalg.norm(d - d0, 'fro') # test different implementations for iid, d2 in enumerate( [pnorm_w_python(**kwargs), pnorm_w_python(use_sq_euclidean=True, **kwargs), pnorm_w_python(heuristic='auto', **kwargs), pnorm_w_python(use_sq_euclidean=False, **kwargs), pnorm_w_python(heuristic='auto', use_sq_euclidean=False, **kwargs), pnorm_w_python(heuristic='samples', use_sq_euclidean=False, **kwargs), pnorm_w_python(heuristic='features', use_sq_euclidean=False, **kwargs), ]): dnorm = N.linalg.norm(d2 - d, 'fro') self.failUnless(dnorm/d0norm < 1e-7, msg="Failed comparison of different implementations on " "data #%d, implementation #%d, p=%s. " "Norm of the difference is %g" % (did, iid, p, dnorm))
def test_pnorm_w(self): data0 = datasets["uni4large"].samples.T weight = np.abs(data0[11, :60]) self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], p=1.2, heuristic="buga") self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], weight=weight) self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], data0[:10, :3], weight=weight) self.failUnlessRaises(ValueError, pnorm_w, data0[:10, :2], data0[:10, :3], weight=weight) self.failUnlessRaises(ValueError, pnorm_w, data0[:10, :2], weight=weight) # some sanity checks for did, (data1, data2, w) in enumerate( [ (data0[:2, :60], None, None), (data0[:2, :60], data0[3:4, 1:61], None), (data0[:2, :60], None, weight), (data0[:2, :60], data0[3:4, 1:61], weight), ] ): # test different norms for p in [1, 2, 1.2]: kwargs = {"data1": data1, "data2": data2, "weight": w, "p": p} d = pnorm_w(**kwargs) # default one # to assess how far we are kwargs0 = kwargs.copy() kwargs0["data2"] = np.zeros(data1.shape) d0 = pnorm_w(**kwargs0) d0norm = np.linalg.norm(d - d0, "fro") # test different implementations for iid, d2 in enumerate( [ pnorm_w_python(**kwargs), pnorm_w_python(use_sq_euclidean=True, **kwargs), pnorm_w_python(heuristic="auto", **kwargs), pnorm_w_python(use_sq_euclidean=False, **kwargs), ] + [ pnorm_w_python(heuristic=h, use_sq_euclidean=False, **kwargs) for h in ("auto", "samples", "features") ] ): dnorm = np.linalg.norm(d2 - d, "fro") self.failUnless( dnorm / d0norm < 1e-7, msg="Failed comparison of different implementations on " "data #%d, implementation #%d, p=%s. " "Norm of the difference is %g" % (did, iid, p, dnorm), )
def _call(self, dataset): """Computes featurewise I-RELIEF weights.""" samples = dataset.samples NS, NF = samples.shape[:2] if self.w_guess == None: w = np.ones(NF, 'd') w /= (w**2).sum() # do normalization in all cases to be safe :) M, H = self.compute_M_H(dataset.targets) while True: d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1)) ni = np.zeros(NF, 'd') for n in range(NS): # d_w_k[n, n] could be omitted since == 0.0 gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \ / (d_w_k[n, :].sum() - d_w_k[n, n])) alpha_n = np.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum())) beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum())) m_n = (np.abs(samples[n, :] - samples[M[n], :]) \ * alpha_n[:, None]).sum(0) h_n = (np.abs(samples[n, :] - samples[H[n], :]) \ * beta_n[:, None]).sum(0) ni += gamma_n * (m_n - h_n) ni = ni / NS ni_plus = np.clip(ni, 0.0, np.inf) # set all negative elements to zero w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum()))) change = np.abs(w_new - w).sum() if __debug__ and 'IRELIEF' in debug.active: debug('IRELIEF', "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \ % (change, w_new.max(), w_new.min(), w_new.mean(), w_new.std(), np.isnan(w_new).sum())) # update weights: w = w_new if change < self.threshold: break self.w = w return Dataset(self.w[np.newaxis])
def _call(self, dataset): """Computes featurewise I-RELIEF weights.""" samples = dataset.samples NS, NF = samples.shape[:2] if self.w_guess == None: w = N.ones(NF, 'd') w /= (w ** 2).sum() # do normalization in all cases to be safe :) M, H = self.compute_M_H(dataset.labels) while True: d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1)) ni = N.zeros(NF, 'd') for n in range(NS): # d_w_k[n, n] could be omitted since == 0.0 gamma_n = 1.0 - N.nan_to_num(d_w_k[n, M[n]].sum() \ / (d_w_k[n, :].sum() - d_w_k[n, n])) alpha_n = N.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum())) beta_n = N.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum())) m_n = (N.abs(samples[n, :] - samples[M[n], :]) \ * alpha_n[:, None]).sum(0) h_n = (N.abs(samples[n, :] - samples[H[n], :]) \ * beta_n[:, None]).sum(0) ni += gamma_n*(m_n - h_n) ni = ni / NS ni_plus = N.clip(ni, 0.0, N.inf) # set all negative elements to zero w_new = N.nan_to_num(ni_plus / (N.sqrt((ni_plus**2).sum()))) change = N.abs(w_new - w).sum() if __debug__ and 'IRELIEF' in debug.active: debug('IRELIEF', "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \ % (change, w_new.max(), w_new.min(), w_new.mean(), w_new.std(), N.isnan(w_new).sum())) # update weights: w = w_new if change < self.threshold: break self.w = w return w
def test_pnorm_w(self): data0 = datasets['uni4large'].samples.T weight = np.abs(data0[11, :60]) self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], p=1.2, heuristic='buga') self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], weight=weight) self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], data0[:10, :3], weight=weight) self.failUnlessRaises(ValueError, pnorm_w, data0[:10, :2], data0[:10, :3], weight=weight) self.failUnlessRaises(ValueError, pnorm_w, data0[:10, :2], weight=weight) # some sanity checks for did, (data1, data2, w) in enumerate([ (data0[:2, :60], None, None), (data0[:2, :60], data0[3:4, 1:61], None), (data0[:2, :60], None, weight), (data0[:2, :60], data0[3:4, 1:61], weight), ]): # test different norms for p in [1, 2, 1.2]: kwargs = {'data1': data1, 'data2': data2, 'weight': w, 'p': p} d = pnorm_w(**kwargs) # default one # to assess how far we are kwargs0 = kwargs.copy() kwargs0['data2'] = np.zeros(data1.shape) d0 = pnorm_w(**kwargs0) d0norm = np.linalg.norm(d - d0, 'fro') # test different implementations for iid, d2 in enumerate([ pnorm_w_python(**kwargs), pnorm_w_python(use_sq_euclidean=True, **kwargs), pnorm_w_python(heuristic='auto', **kwargs), pnorm_w_python(use_sq_euclidean=False, **kwargs) ] + [ pnorm_w_python( heuristic=h, use_sq_euclidean=False, **kwargs) for h in ('auto', 'samples', 'features') ]): dnorm = np.linalg.norm(d2 - d, 'fro') self.failUnless( dnorm / d0norm < 1e-7, msg="Failed comparison of different implementations on " "data #%d, implementation #%d, p=%s. " "Norm of the difference is %g" % (did, iid, p, dnorm))