예제 #1
0
    def _call(self, dataset):
        """Extract weights from GPR
        """

        clf = self.clf
        kernel = clf.kernel
        train_fv = clf._train_fv
        if isinstance(kernel, LinearKernel):
            Sigma_p = 1.0
        else:
            Sigma_p = kernel.params.Sigma_p

        weights = Ndot(Sigma_p, Ndot(train_fv.T, clf._alpha))

        if self.ca.is_enabled('variances'):
            # super ugly formulas that can be quite surely improved:
            tmp = np.linalg.inv(clf._L)
            Kyinv = Ndot(tmp.T, tmp)
            # XXX in such lengthy matrix manipulations you might better off
            #     using np.matrix where * is a matrix product
            self.ca.variances = Ndiag(
                Sigma_p -
                Ndot(Sigma_p,
                     Ndot(train_fv.T, Ndot(Kyinv, Ndot(train_fv, Sigma_p)))))
        return Dataset(np.atleast_2d(weights))
예제 #2
0
    def _call(self, dataset):
        sensitivities = []
        for ind, analyzer in enumerate(self.__analyzers):
            if __debug__:
                debug("SA", "Computing sensitivity for SA#%d:%s" %
                      (ind, analyzer))
            sensitivity = analyzer(dataset)
            sensitivities.append(sensitivity)

        if __debug__:
            debug("SA",
                  "Returning %d sensitivities from %s" %
                  (len(sensitivities), self.__class__.__name__))

        sa_attr = self._sa_attr
        if isinstance(sensitivities[0], AttrDataset):
            smerged = None
            for i, s in enumerate(sensitivities):
                s.sa[sa_attr] = np.repeat(i, len(s))
                if smerged is None:
                    smerged = s
                else:
                    smerged.append(s)
            sensitivities = smerged
        else:
            sensitivities = \
                Dataset(sensitivities,
                        sa={sa_attr: np.arange(len(sensitivities))})

        self.ca.sensitivities = sensitivities

        return sensitivities
예제 #3
0
    def _forward_dataset_helper(self, ds):
        # local binding
        num = self.__num

        pos = None
        if not self.__position_attr is None:
            # we know something about sample position
            pos = ds.sa[self.__position_attr].value
            rsamples, pos = resample(ds.samples,
                                     self.__num,
                                     t=pos,
                                     window=self.__window_args)
        else:
            # we know nothing about samples position
            rsamples = resample(ds.samples,
                                self.__num,
                                t=None,
                                window=self.__window_args)
        # new dataset that reuses that feature and dataset attributes of the
        # source
        mds = Dataset(rsamples, fa=ds.fa, a=ds.a)

        # the tricky part is what to do with the samples attributes, since their
        # number has changes
        if self.__attr_strategy == 'remove':
            # nothing to be done
            pass
        elif self.__attr_strategy == 'sample':
            step = int(len(ds) / num)
            sa = dict([(k, ds.sa[k].value[0::step][:num]) for k in ds.sa])
            mds.sa.update(sa)
        elif self.__attr_strategy == 'resample':
            # resample the attributes themselves
            sa = {}
            for k in ds.sa:
                v = ds.sa[k].value
                if pos is None:
                    sa[k] = resample(v,
                                     self.__num,
                                     t=None,
                                     window=self.__window_args)
                else:
                    if k == self.__position_attr:
                        # position attr will be handled separately at the end
                        continue
                    sa[k] = resample(v,
                                     self.__num,
                                     t=pos,
                                     window=self.__window_args)[0]
            # inject them all
            mds.sa.update(sa)
        else:
            raise ValueError("Unkown attribute handling strategy '%s'." %
                             self.__attr_strategy)

        if not pos is None:
            # we got the new sample positions and can store them
            mds.sa[self.__position_attr] = pos
        return mds
예제 #4
0
 def test_linear_kernel(self):
     """Simplistic testing of linear kernel"""
     d1 = Dataset(np.asarray([range(5)] * 10, dtype=float))
     lk = npK.LinearKernel()
     lk.compute(d1)
     self.failUnless(lk._k.shape == (10, 10),
                     "Failure computing LinearKernel (Size mismatch)")
     self.failUnless((lk._k == 30).all(), "Failure computing LinearKernel")
예제 #5
0
    def test_cached_kernel(self):
        nchunks = 5
        n = 50 * nchunks
        d = Dataset(np.random.randn(n, 132))
        d.sa.chunks = np.random.randint(nchunks, size=n)

        # We'll compare against an Rbf just because it has a parameter to change
        rk = npK.RbfKernel(sigma=1.5)

        # Assure two kernels are independent for this test
        ck = CachedKernel(kernel=npK.RbfKernel(sigma=1.5))
        ck.compute(d)  # Initial cache of all data

        self.failUnless(ck._recomputed,
                        'CachedKernel was not initially computed')

        # Try some splitting
        for chunk in [d[d.sa.chunks == i] for i in range(nchunks)]:
            rk.compute(chunk)
            ck.compute(chunk)
            self.kernel_equiv(rk, ck)  #, accuracy=1e-12)
            self.failIf(ck._recomputed,
                        "CachedKernel incorrectly recomputed it's kernel")

        # Test what happens when a parameter changes
        ck.params.sigma = 3.5
        ck.compute(d)
        self.failUnless(ck._recomputed,
                        "CachedKernel doesn't recompute on kernel change")
        rk.params.sigma = 3.5
        rk.compute(d)
        self.failUnless(np.all(rk._k == ck._k),
                        'Cached and rbf kernels disagree after kernel change')

        # Now test handling new data
        d2 = Dataset(np.random.randn(32, 43))
        ck.compute(d2)
        self.failUnless(
            ck._recomputed,
            "CachedKernel did not automatically recompute new data")
        ck.compute(d)
        self.failUnless(ck._recomputed,
                        "CachedKernel did not recompute old data which had\n" +\
                        "previously been computed, but had the cache overriden")
예제 #6
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF weights."""
        samples = dataset.samples
        NS, NF = samples.shape[:2]
        if self.w_guess == None:
            self.w = np.ones(NF, 'd')
        # do normalization in all cases to be safe :)
        self.w = self.w / (self.w**2).sum()

        M, H = self.compute_M_H(dataset.targets)

        while True:
            self.k = self.kernel(length_scale=self.kernel_width / self.w)
            d_w_k = self.k.computed(samples).as_raw_np()
            # set d_w_k to zero where distance=0 (i.e. kernel ==
            # 1.0), otherwise I-RELIEF could not converge.
            # XXX Note that kernel==1 for distance=0 only for
            # exponential kernels!!  IMPROVE
            d_w_k[np.abs(d_w_k - 1.0) < 1.0e-15] = 0.0
            ni = np.zeros(NF, 'd')
            for n in range(NS):
                # d_w_k[n,n] could be omitted since == 0.0
                gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \
                                / (d_w_k[n, :].sum()-d_w_k[n, n]))
                alpha_n = np.nan_to_num(d_w_k[n, M[n]] /
                                        (d_w_k[n, M[n]].sum()))
                beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))

                m_n = (np.abs(samples[n, :] - samples[M[n], :]) \
                        * alpha_n[:, None]).sum(0)
                h_n = (np.abs(samples[n, :] - samples[H[n], :]) \
                        * beta_n[:, None]).sum(0)
                ni += gamma_n * (m_n - h_n)
            ni = ni / NS

            ni_plus = np.clip(ni, 0.0,
                              np.inf)  # set all negative elements to zero
            w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
            change = np.abs(w_new - self.w).sum()
            if __debug__ and 'IRELIEF' in debug.active:
                debug(
                    'IRELIEF',
                    "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" %
                    (change, w_new.max(), w_new.min(), w_new.mean(),
                     w_new.std(), np.isnan(w_new).sum()))

            # update weights:
            self.w = w_new
            if change < self.threshold:
                break

        return Dataset(self.w[np.newaxis])
예제 #7
0
def test_datasetmapping():
    # 6 samples, 4 features
    data = np.arange(24).reshape(6, 4)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, inspace='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength, ds.nfeatures))
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx,
                       np.repeat(np.arange(boxlength), 4).reshape(2, -1))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11],
                  [16, 17, 18, 19], [20, 21, 22, 23]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)
예제 #8
0
    def build_streamline_things(self):
        # Build a dataset having samples of different lengths. This is
        # trying to mimic a possible interface for streamlines
        # datasets, i.e., an iterable container of Mx3 points, where M
        # depends on each single streamline.

        # trying to pack it into an 'object' array to prevent conversion in the
        # Dataset
        self.streamline_samples = np.array([
                                   np.random.rand(3,3),
                                   np.random.rand(5,3),
                                   np.random.rand(7,3)],
                                   dtype='object')
        self.dataset = Dataset(self.streamline_samples)
        self.similarities = [StreamlineSimilarity(distance=corouge)]
예제 #9
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF weights."""
        samples = dataset.samples
        NS, NF = samples.shape[:2]

        if self.w_guess == None:
            w = np.ones(NF, 'd')

        w /= (w**2).sum()  # do normalization in all cases to be safe :)

        M, H = self.compute_M_H(dataset.targets)

        while True:
            d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1))
            ni = np.zeros(NF, 'd')
            for n in range(NS):
                # d_w_k[n, n] could be omitted since == 0.0
                gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \
                                / (d_w_k[n, :].sum() - d_w_k[n, n]))
                alpha_n = np.nan_to_num(d_w_k[n, M[n]] /
                                        (d_w_k[n, M[n]].sum()))
                beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))

                m_n = (np.abs(samples[n, :] - samples[M[n], :]) \
                       * alpha_n[:, None]).sum(0)
                h_n = (np.abs(samples[n, :] - samples[H[n], :]) \
                       * beta_n[:, None]).sum(0)
                ni += gamma_n * (m_n - h_n)

            ni = ni / NS

            ni_plus = np.clip(ni, 0.0,
                              np.inf)  # set all negative elements to zero
            w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
            change = np.abs(w_new - w).sum()
            if __debug__ and 'IRELIEF' in debug.active:
                debug('IRELIEF',
                      "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \
                      % (change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), np.isnan(w_new).sum()))

            # update weights:
            w = w_new
            if change < self.threshold:
                break

        self.w = w
        return Dataset(self.w[np.newaxis])
예제 #10
0
def test_resample():
    time = np.linspace(0, 2 * np.pi, 100)
    ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T,
                 sa={
                     'time': time,
                     'section': np.repeat(range(10), 10)
                 })
    assert_equal(ds.shape, (100, 2))

    # downsample
    num = 10
    rm = FFTResampleMapper(num,
                           window=('gauss', 50),
                           position_attr='time',
                           attr_strategy='sample')
    mds = rm(ds)
    assert_equal(mds.shape, (num, ds.nfeatures))
    # didn't change the orig
    assert_equal(len(ds), 100)

    # check position-based resampling
    ds_partial = ds[0::10]
    mds_partial = rm(ds_partial)
    # despite different input sampling should yield the same output timepoints
    assert_array_almost_equal(mds.sa.time, mds_partial.sa.time)
    # exclude the first points to prevent edge effects, but the data should be
    # very similar too
    assert_array_almost_equal(mds.samples[2:],
                              mds_partial.samples[2:],
                              decimal=2)
    # simple sample of sa's should give meaningful stuff
    assert_array_equal(mds.sa.section, range(10))

    # and now for a dataset with chunks
    cds = vstack([ds.copy(), ds.copy()])
    cds.sa['chunks'] = np.repeat([0, 1], len(ds))
    rm = FFTResampleMapper(num,
                           attr_strategy='sample',
                           chunks_attr='chunks',
                           window=('gauss', 50))
    mcds = rm(cds)
    assert_equal(mcds.shape, (20, 2))
    assert_array_equal(mcds.sa.section, np.tile(range(10), 2))
    # each individual chunks should be identical to previous dataset
    assert_array_almost_equal(mds.samples, mcds.samples[:10])
    assert_array_almost_equal(mds.samples, mcds.samples[10:])
예제 #11
0
def bench_pymvpa(X, Y):
    """
    bench with pymvpa (by default uses a custom swig-generated wrapper
    around libsvm)
    """
    from mvpa.datasets import Dataset
    from mvpa.clfs import svm

    gc.collect()

    # start time
    tstart = datetime.now()
    data = Dataset(samples=X, labels=Y)
    clf = svm.RbfCSVMC(C=1.)
    clf.train(data)
    Z = clf.predict(X)
    delta = (datetime.now() - tstart)

    # stop time
    mvpa_results.append(delta.seconds + delta.microseconds / mu_second)
예제 #12
0
 def test_1d_multispace_searchlight(self):
     ds = Dataset([np.arange(6)])
     ds.fa['coord1'] = np.repeat(np.arange(3), 2)
     # add a second space to the dataset
     ds.fa['coord2'] = np.tile(np.arange(2), 3)
     measure = lambda x: "+".join([str(x) for x in x.samples[0]])
     # simply select each feature once
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0),
                                        coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0),
                                        coord2=Sphere(1)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(1),
                                        coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
예제 #13
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF-2 weights. Online version."""
        # local bindings
        samples = dataset.samples
        NS, NF = samples.shape[:2]
        threshold = self.threshold
        a = self.a

        if self.w_guess == None:
            w = np.ones(NF, 'd')

        # do normalization in all cases to be safe :)
        w /= (w**2).sum()

        M, H = self.compute_M_H(dataset.targets)

        ni = np.zeros(NF, 'd')
        pi = np.zeros(NF, 'd')

        if self.permute:
            # indices to go through x in random order
            random_sequence = np.random.permutation(NS)
        else:
            random_sequence = np.arange(NS)

        change = threshold + 1.0
        iteration = 0
        counter = 0.0
        while change > threshold and iteration < self.max_iter:
            if __debug__:
                debug('IRELIEF', "Iteration %d" % iteration)
            for t in range(NS):
                counter += 1.0
                n = random_sequence[t]

                d_xn_x = np.abs(samples[n, :] - samples)
                d_w_k_xn_x = self.k((d_xn_x * w).sum(1))

                d_w_k_xn_Mn = d_w_k_xn_x[M[n]]
                d_w_k_xn_Mn_sum = d_w_k_xn_Mn.sum()

                gamma_n = 1.0 - d_w_k_xn_Mn_sum / d_w_k_xn_x.sum()
                alpha_n = d_w_k_xn_Mn / d_w_k_xn_Mn_sum

                d_w_k_xn_Hn = d_w_k_xn_x[H[n]]
                beta_n = d_w_k_xn_Hn / d_w_k_xn_Hn.sum()

                m_n = (d_xn_x[M[n], :] * alpha_n[:, None]).sum(0)
                h_n = (d_xn_x[H[n], :] * beta_n[:, None]).sum(0)
                pi = gamma_n * (m_n - h_n)
                learning_rate = 1.0 / (counter * a + 1.0)
                ni_new = ni + learning_rate * (pi - ni)
                ni = ni_new

                # set all negative elements to zero
                ni_plus = np.clip(ni, 0.0, np.inf)
                w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
                change = np.abs(w_new - w).sum()
                if t % 10 == 0 and __debug__ and 'IRELIEF' in debug.active:
                    debug(
                        'IRELIEF',
                        "t=%d change=%.4f max=%f min=%.4f mean=%.4f std=%.4f"
                        " #nan=%d" %
                        (t, change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), np.isnan(w_new).sum()))

                w = w_new

                if change < threshold and iteration > 0:
                    break

            iteration += 1

        self.w = w
        return Dataset(self.w[np.newaxis])
예제 #14
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF-2 weights. Online version."""
        NS = dataset.samples.shape[0]
        NF = dataset.samples.shape[1]
        if self.w_guess == None:
            self.w = np.ones(NF, 'd')
        # do normalization in all cases to be safe :)
        self.w = self.w / (self.w**2).sum()

        M, H = self.compute_M_H(dataset.targets)

        ni = np.zeros(NF, 'd')
        pi = np.zeros(NF, 'd')

        if self.permute:
            # indices to go through samples in random order
            random_sequence = np.random.permutation(NS)
        else:
            random_sequence = np.arange(NS)

        change = self.threshold + 1.0
        iteration = 0
        counter = 0.0
        while change > self.threshold and iteration < self.max_iter:
            if __debug__:
                debug('IRELIEF', "Iteration %d" % iteration)

            for t in range(NS):
                counter += 1.0
                n = random_sequence[t]

                self.k = self.kernel(length_scale=self.kernel_width / self.w)
                d_w_k_xn_Mn = self.k.computed(
                    dataset.samples[None, n, :],
                    dataset.samples[M[n], :]).as_raw_np().squeeze()
                d_w_k_xn_Mn_sum = d_w_k_xn_Mn.sum()
                d_w_k_xn_x = self.k.computed(
                    dataset.samples[None, n, :],
                    dataset.samples).as_raw_np().squeeze()
                gamma_n = 1.0 - d_w_k_xn_Mn_sum / d_w_k_xn_x.sum()
                alpha_n = d_w_k_xn_Mn / d_w_k_xn_Mn_sum

                d_w_k_xn_Hn = self.k.computed(
                    dataset.samples[None, n, :],
                    dataset.samples[H[n], :]).as_raw_np().squeeze()
                beta_n = d_w_k_xn_Hn / d_w_k_xn_Hn.sum()

                m_n = (np.abs(dataset.samples[n, :] - dataset.samples[M[n], :]) \
                        * alpha_n[:, np.newaxis]).sum(0)
                h_n = (np.abs(dataset.samples[n, :] - dataset.samples[H[n], :]) \
                        * beta_n[:, np.newaxis]).sum(0)
                pi = gamma_n * (m_n - h_n)
                learning_rate = 1.0 / (counter * self.a + 1.0)
                ni_new = ni + learning_rate * (pi - ni)
                ni = ni_new

                # set all negative elements to zero
                ni_plus = np.clip(ni, 0.0, np.inf)
                w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
                change = np.abs(w_new - self.w).sum()
                if t % 10 == 0 and __debug__ and 'IRELIEF' in debug.active:
                    debug(
                        'IRELIEF',
                        "t=%d change=%.4f max=%f min=%.4f mean=%.4f std=%.4f"
                        " #nan=%d" %
                        (t, change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), np.isnan(w_new).sum()))

                self.w = w_new

                if change < self.threshold and iteration > 0:
                    break

            iteration += 1

        return Dataset(self.w[np.newaxis])
예제 #15
0
from mvpa.datasets import Dataset

#pymvpa stuff
f_handle = open("classdatafile.txt", 'r')
f_handle2 = open("classidfile.txt", 'r')
f_handle3 = open("predictdata.txt", 'r')
features = genfromtxt(f_handle, dtype=float)
classes = genfromtxt(f_handle2, dtype=int)
predictdata = genfromtxt(f_handle3, dtype=float)
predictdata = np.expand_dims(predictdata, axis=0)
print predictdata
print np.shape(features), features.ndim, features.dtype
print np.shape(classes), classes.ndim, classes.dtype
print np.shape(predictdata), predictdata.ndim, predictdata.dtype
f_handle.close()
f_handle2.close()
f_handle3.close()

training = Dataset(samples=features, labels=classes)
clf = kNN(k=2)
print "clf = ", clf
clf.train(training)
#print np.mean(clf.predict(training.samples) == training.labels)
classID = clf.predict(predictdata)
print "classID = ", classID
#print clf.trained_labels
if classID[0] == 1: print "Image is of class: GRASS"
if classID[0] == 2: print "Image is of class: DIRT/GRAVEL"
if classID[0] == 3:
    print "Image is of class: CEMENT/ASPHALT"
예제 #16
0
lbp = np.asarray(lbp)
i3_histo = np.asarray(i3_histo)
rgb_histo = np.asarray(rgb_histo)

id_index = 15
lbp_predictdata = lbp[[id_index]]
i3_histo_predictdata = lbp[[id_index]]

print
#print predictdata
print classID[id_index]
#print "len lbp:", len(lbp)
#print "shape:", lbp.shape

#mvpa
lbp_training = Dataset(samples=lbp,labels=classID)
i3_histo_training = Dataset(samples=lbp,labels=classID)
clf = kNN(k=1, voting='majority')
print "clf = ", clf
clf.train(lbp_training)
lbp_predicted_classID =  clf.predict(lbp_predictdata)
clf.train(i3_histo_training)
i3_histo_predicted_classID =  clf.predict(i3_histo_predictdata)



print "lbp_predicted_classID: ", lbp_predicted_classID 
print "i3_histo__predicted_classID :", i3_histo_predicted_classID
#if predicted_classID[0]  == 1.0: print "Image is of class: GRASS"
#if predicted_classID[0]  == 2.0: print "Image is of class: DIRT/GRAVEL"
#if predicted_classID[0]  == 3.0: print "Image is of class: CEMENT/ASPHALT"
예제 #17
0
def test_polydetrend():
    samples_forwhole = np.array(
        [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2).T
    samples_forchunks = np.array(
        [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2).T
    chunks = [0, 0, 0, 1, 1, 1]
    chunks_bad = [0, 0, 1, 1, 1, 0]
    target_whole = np.array([[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]],
                            ndmin=2).T
    target_chunked = np.array([[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]],
                              ndmin=2).T

    ds = Dataset(samples_forwhole)

    # this one will auto-train the mapper on first use
    dm = PolyDetrendMapper(polyord=1, inspace='police')
    mds = dm(ds)
    # features are linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials
    assert_array_equal(mds.sa.police, np.arange(len(ds)))

    # hackish way to get the previous regressors into a dataset
    ds.sa['opt_reg_const'] = dm._regs[:, 0]
    ds.sa['opt_reg_lin'] = dm._regs[:, 1]
    # using these precomputed regressors, we should get the same result as
    # before even if we do not generate a regressor for linear
    dm_optreg = PolyDetrendMapper(polyord=0,
                                  opt_regs=['opt_reg_const', 'opt_reg_lin'])
    mds_optreg = dm_optreg(ds)
    assert_array_almost_equal(mds_optreg, np.zeros(mds.shape))

    ds = Dataset(samples_forchunks)
    # 'constant' detrending removes the mean
    mds = PolyDetrendMapper(polyord=0)(ds)
    assert_array_almost_equal(
        mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0))
    # if there is no GLOBAL linear trend it should be identical to mean removal
    # even if trying to remove linear
    mds2 = PolyDetrendMapper(polyord=1)(ds)
    assert_array_almost_equal(mds, mds2)

    # chunk-wise detrending
    ds = dataset_wizard(samples_forchunks, chunks=chunks)
    dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='police')
    mds = dm(ds)
    # features are chunkswise linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials, which is the identical linspace in both
    # chunks
    assert_array_equal(mds.sa.police, range(3) * 2)
    # non-matching number of samples cannot be mapped
    assert_raises(ValueError, dm, ds[:-1])
    # however, if the dataset knows about the space it is possible
    ds.sa['police'] = mds.sa.police
    # XXX this should be
    #mds2 = dm(ds[1:-1])
    #assert_array_equal(mds[1:-1], mds2)
    # XXX but right now is
    assert_raises(NotImplementedError, dm, ds[1:-1])

    # Detrend must preserve the size of dataset
    assert_equal(mds.shape, ds.shape)

    # small additional test for break points
    # although they are no longer there
    ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T,
                        targets=chunks,
                        chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1)(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # test of different polyord on each chunk
    target_mixed = np.array([[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]],
                            ndmin=2).T
    ds = dataset_wizard(samples_forchunks.copy(),
                        targets=chunks,
                        chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0, 1])(ds)
    assert_array_almost_equal(mds, target_mixed)

    # test irregluar spacing of samples, but with corrective time info
    samples_forwhole = np.array(
        [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2).T
    ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:, 0]})
    # linear detrending that makes use of temporal info from dataset
    dm = PolyDetrendMapper(polyord=1, inspace='time')
    mds = dm(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # and now the same stuff, but with chunking and ordered by time
    samples_forchunks = np.array(
        [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2).T
    chunks = [0, 1, 0, 1, 0, 1]
    time = [4, 4, 12, 8, 8, 12]
    ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time})
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1,
                            inspace='time')(ds)

    # the whole thing must not affect the source data
    assert_array_equal(ds, samples_forchunks)
    # but if done inplace that is no longer true
    poly_detrend(ds, chunks_attr='chunks', polyord=1, inspace='time')
    assert_array_equal(ds, mds)