Beispiel #1
0
    def testPNorm_w(self):
        data0 = datasets['uni4large'].samples.T
        weight = N.abs(data0[11, :60])

        self.failUnlessRaises(ValueError, pnorm_w_python,
                              data0[:10,:2], p=1.2, heuristic='buga')
        self.failUnlessRaises(ValueError, pnorm_w_python,
                              data0[:10,:2], weight=weight)

        self.failUnlessRaises(ValueError, pnorm_w_python,
                              data0[:10,:2], data0[:10, :3],
                              weight=weight)
        self.failUnlessRaises(ValueError, pnorm_w,
                              data0[:10,:2], data0[:10, :3],
                              weight=weight)

        self.failUnlessRaises(ValueError, pnorm_w,
                              data0[:10,:2], weight=weight)

        # some sanity checks
        for did, (data1, data2, w) in enumerate(
            [ (data0[:2, :60], None, None),
              (data0[:2, :60], data0[3:4, 1:61], None),
              (data0[:2, :60], None, weight),
              (data0[:2, :60], data0[3:4, 1:61], weight),
              ]):
            # test different norms
            for p in [1, 2, 1.2]:
                kwargs = {'data1': data1,
                          'data2': data2,
                          'weight' : w,
                          'p' : p}
                d = pnorm_w(**kwargs)    # default one
                # to assess how far we are
                kwargs0 = kwargs.copy()
                kwargs0['data2'] = N.zeros(data1.shape)
                d0 = pnorm_w(**kwargs0)
                d0norm = N.linalg.norm(d - d0, 'fro')
                # test different implementations
                for iid, d2 in enumerate(
                    [pnorm_w_python(**kwargs),
                     pnorm_w_python(use_sq_euclidean=True, **kwargs),
                     pnorm_w_python(heuristic='auto', **kwargs),
                     pnorm_w_python(use_sq_euclidean=False, **kwargs),
                     pnorm_w_python(heuristic='auto', use_sq_euclidean=False, **kwargs),
                     pnorm_w_python(heuristic='samples', use_sq_euclidean=False, **kwargs),
                     pnorm_w_python(heuristic='features', use_sq_euclidean=False, **kwargs),
                     ]):
                    dnorm = N.linalg.norm(d2 - d, 'fro')
                    self.failUnless(dnorm/d0norm < 1e-7,
                        msg="Failed comparison of different implementations on "
                            "data #%d, implementation #%d, p=%s. "
                            "Norm of the difference is %g"
                            % (did, iid, p, dnorm))
Beispiel #2
0
    def test_pnorm_w(self):
        data0 = datasets["uni4large"].samples.T
        weight = np.abs(data0[11, :60])

        self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], p=1.2, heuristic="buga")
        self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], weight=weight)

        self.failUnlessRaises(ValueError, pnorm_w_python, data0[:10, :2], data0[:10, :3], weight=weight)
        self.failUnlessRaises(ValueError, pnorm_w, data0[:10, :2], data0[:10, :3], weight=weight)

        self.failUnlessRaises(ValueError, pnorm_w, data0[:10, :2], weight=weight)

        # some sanity checks
        for did, (data1, data2, w) in enumerate(
            [
                (data0[:2, :60], None, None),
                (data0[:2, :60], data0[3:4, 1:61], None),
                (data0[:2, :60], None, weight),
                (data0[:2, :60], data0[3:4, 1:61], weight),
            ]
        ):
            # test different norms
            for p in [1, 2, 1.2]:
                kwargs = {"data1": data1, "data2": data2, "weight": w, "p": p}
                d = pnorm_w(**kwargs)  # default one
                # to assess how far we are
                kwargs0 = kwargs.copy()
                kwargs0["data2"] = np.zeros(data1.shape)
                d0 = pnorm_w(**kwargs0)
                d0norm = np.linalg.norm(d - d0, "fro")
                # test different implementations
                for iid, d2 in enumerate(
                    [
                        pnorm_w_python(**kwargs),
                        pnorm_w_python(use_sq_euclidean=True, **kwargs),
                        pnorm_w_python(heuristic="auto", **kwargs),
                        pnorm_w_python(use_sq_euclidean=False, **kwargs),
                    ]
                    + [
                        pnorm_w_python(heuristic=h, use_sq_euclidean=False, **kwargs)
                        for h in ("auto", "samples", "features")
                    ]
                ):
                    dnorm = np.linalg.norm(d2 - d, "fro")
                    self.failUnless(
                        dnorm / d0norm < 1e-7,
                        msg="Failed comparison of different implementations on "
                        "data #%d, implementation #%d, p=%s. "
                        "Norm of the difference is %g" % (did, iid, p, dnorm),
                    )
Beispiel #3
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF weights."""
        samples = dataset.samples
        NS, NF = samples.shape[:2]

        if self.w_guess == None:
            w = np.ones(NF, 'd')

        w /= (w**2).sum()  # do normalization in all cases to be safe :)

        M, H = self.compute_M_H(dataset.targets)

        while True:
            d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1))
            ni = np.zeros(NF, 'd')
            for n in range(NS):
                # d_w_k[n, n] could be omitted since == 0.0
                gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \
                                / (d_w_k[n, :].sum() - d_w_k[n, n]))
                alpha_n = np.nan_to_num(d_w_k[n, M[n]] /
                                        (d_w_k[n, M[n]].sum()))
                beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))

                m_n = (np.abs(samples[n, :] - samples[M[n], :]) \
                       * alpha_n[:, None]).sum(0)
                h_n = (np.abs(samples[n, :] - samples[H[n], :]) \
                       * beta_n[:, None]).sum(0)
                ni += gamma_n * (m_n - h_n)

            ni = ni / NS

            ni_plus = np.clip(ni, 0.0,
                              np.inf)  # set all negative elements to zero
            w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
            change = np.abs(w_new - w).sum()
            if __debug__ and 'IRELIEF' in debug.active:
                debug('IRELIEF',
                      "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \
                      % (change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), np.isnan(w_new).sum()))

            # update weights:
            w = w_new
            if change < self.threshold:
                break

        self.w = w
        return Dataset(self.w[np.newaxis])
Beispiel #4
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF weights."""
        samples = dataset.samples
        NS, NF = samples.shape[:2]

        if self.w_guess == None:
            w = N.ones(NF, 'd')

        w /= (w ** 2).sum() # do normalization in all cases to be safe :)

        M, H = self.compute_M_H(dataset.labels)

        while True:
            d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1))
            ni = N.zeros(NF, 'd')
            for n in range(NS):
                 # d_w_k[n, n] could be omitted since == 0.0
                gamma_n = 1.0 - N.nan_to_num(d_w_k[n, M[n]].sum() \
                                / (d_w_k[n, :].sum() - d_w_k[n, n]))
                alpha_n = N.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum()))
                beta_n = N.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))

                m_n = (N.abs(samples[n, :] - samples[M[n], :]) \
                       * alpha_n[:, None]).sum(0)
                h_n = (N.abs(samples[n, :] - samples[H[n], :]) \
                       * beta_n[:, None]).sum(0)
                ni += gamma_n*(m_n - h_n)

            ni = ni / NS

            ni_plus = N.clip(ni, 0.0, N.inf) # set all negative elements to zero
            w_new = N.nan_to_num(ni_plus / (N.sqrt((ni_plus**2).sum())))
            change = N.abs(w_new - w).sum()
            if __debug__ and 'IRELIEF' in debug.active:
                debug('IRELIEF',
                      "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \
                      % (change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), N.isnan(w_new).sum()))

            # update weights:
            w = w_new
            if change < self.threshold:
                break

        self.w = w
        return w
    def test_pnorm_w(self):
        data0 = datasets['uni4large'].samples.T
        weight = np.abs(data0[11, :60])

        self.failUnlessRaises(ValueError,
                              pnorm_w_python,
                              data0[:10, :2],
                              p=1.2,
                              heuristic='buga')
        self.failUnlessRaises(ValueError,
                              pnorm_w_python,
                              data0[:10, :2],
                              weight=weight)

        self.failUnlessRaises(ValueError,
                              pnorm_w_python,
                              data0[:10, :2],
                              data0[:10, :3],
                              weight=weight)
        self.failUnlessRaises(ValueError,
                              pnorm_w,
                              data0[:10, :2],
                              data0[:10, :3],
                              weight=weight)

        self.failUnlessRaises(ValueError,
                              pnorm_w,
                              data0[:10, :2],
                              weight=weight)

        # some sanity checks
        for did, (data1, data2, w) in enumerate([
            (data0[:2, :60], None, None),
            (data0[:2, :60], data0[3:4, 1:61], None),
            (data0[:2, :60], None, weight),
            (data0[:2, :60], data0[3:4, 1:61], weight),
        ]):
            # test different norms
            for p in [1, 2, 1.2]:
                kwargs = {'data1': data1, 'data2': data2, 'weight': w, 'p': p}
                d = pnorm_w(**kwargs)  # default one
                # to assess how far we are
                kwargs0 = kwargs.copy()
                kwargs0['data2'] = np.zeros(data1.shape)
                d0 = pnorm_w(**kwargs0)
                d0norm = np.linalg.norm(d - d0, 'fro')
                # test different implementations
                for iid, d2 in enumerate([
                        pnorm_w_python(**kwargs),
                        pnorm_w_python(use_sq_euclidean=True, **kwargs),
                        pnorm_w_python(heuristic='auto', **kwargs),
                        pnorm_w_python(use_sq_euclidean=False, **kwargs)
                ] + [
                        pnorm_w_python(
                            heuristic=h, use_sq_euclidean=False, **kwargs)
                        for h in ('auto', 'samples', 'features')
                ]):
                    dnorm = np.linalg.norm(d2 - d, 'fro')
                    self.failUnless(
                        dnorm / d0norm < 1e-7,
                        msg="Failed comparison of different implementations on "
                        "data #%d, implementation #%d, p=%s. "
                        "Norm of the difference is %g" % (did, iid, p, dnorm))