Ejemplo n.º 1
0
 def test_slightly_different_domain(self):
     """ If test data has a slightly different domain then (with interpolation)
     we should obtain a similar classification score. """
     learner = LogisticRegressionLearner(preprocessors=[])
     for proc in PREPROCESSORS:
         # LR that can not handle unknown values
         train, test = separate_learn_test(self.collagen)
         train1 = proc(train)
         aucorig = AUC(TestOnTestData(train1, test, [learner]))
         test = destroy_atts_conversion(test)
         test = odd_attr(test)
         # a subset of points for training so that all test sets points
         # are within the train set points, which gives no unknowns
         train = Interpolate(points=getx(train)[1:-3])(
             train)  # make train capable of interpolation
         train = proc(train)
         # explicit domain conversion test to catch exceptions that would
         # otherwise be silently handled in TestOnTestData
         _ = Orange.data.Table(train.domain, test)
         aucnow = AUC(TestOnTestData(train, test, [learner]))
         self.assertAlmostEqual(aucnow, aucorig, delta=0.02)
         test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
         _ = Orange.data.Table(train.domain, test)  # explicit call again
         aucnow = AUC(TestOnTestData(train, test, [learner]))
         self.assertAlmostEqual(
             aucnow, aucorig, delta=0.05)  # the difference should be slight
Ejemplo n.º 2
0
 def commit(self):
     out = None
     self.Error.dxzero.clear()
     self.Error.too_many_points.clear()
     if self.data:
         if self.input_radio == 0:
             points = getx(self.data)
             out = Interpolate(points)(self.data)
         elif self.input_radio == 1:
             xs = getx(self.data)
             if not self.dx > 0:
                 self.Error.dxzero()
             else:
                 xmin = self.xmin if self.xmin is not None else np.min(xs)
                 xmax = self.xmax if self.xmax is not None else np.max(xs)
                 xmin, xmax = min(xmin, xmax), max(xmin, xmax)
                 reslength = abs(math.ceil((xmax - xmin) / self.dx))
                 if reslength < 10002:
                     points = np.arange(xmin, xmax, self.dx)
                     out = Interpolate(points)(self.data)
                 else:
                     self.Error.too_many_points(reslength)
         elif self.input_radio == 2 and self.data_points is not None:
             out = Interpolate(self.data_points)(self.data)
     self.send("Interpolated data", out)
 def test_permute(self):
     rs = np.random.RandomState(0)
     data = Orange.data.Table("iris")
     oldX = data.X
     #permute data
     p = rs.permutation(range(len(data.domain.attributes)))
     for i, a in enumerate(data.domain.attributes):
         a.name = str(p[i])
     data.X = data.X[:, p]
     interpolated = Interpolate(range(len(data.domain.attributes)))(data)
     np.testing.assert_allclose(interpolated.X, oldX)
     #also permute output
     p1 = rs.permutation(range(len(data.domain.attributes)))
     interpolated = Interpolate(p1)(data)
     np.testing.assert_allclose(interpolated.X, oldX[:, p1])
     Orange.data.domain.Variable._clear_all_caches()
Ejemplo n.º 4
0
 def test_predict_savgol_another_interpolate(self):
     train, test = separate_learn_test(self.collagen)
     train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train)
     auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     train = Interpolate(points=getx(train))(train)
     aucai = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(auc, aucai, delta=0.02)
 def test_out_of_band(self):
     data = Orange.data.Table("iris")
     interpolated = Interpolate(range(-1,
                                      len(data.domain.attributes) +
                                      1))(data)
     np.testing.assert_allclose(interpolated.X[:, 1:5], data.X)
     np.testing.assert_equal(interpolated.X[:, [0, -1]], np.nan)
 def test_domain_conversion(self):
     """Test whether a domain can be used for conversion."""
     data = Orange.data.Table("iris")
     interpolated = Interpolate([0.5, 1.5])(data)
     nt = Orange.data.Table.from_table(interpolated.domain, data)
     self.assertEqual(interpolated.domain, nt.domain)
     np.testing.assert_equal(interpolated.X, nt.X)
     np.testing.assert_equal(interpolated.Y, nt.Y)
Ejemplo n.º 7
0
 def test_predict_samename_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = destroy_atts_conversion(test)
     train = Interpolate(points=getx(train))(
         train)  # make train capable of interpolation
     auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertEqual(aucorig, auc)
 def test_floatname(self):
     data = Orange.data.Table("collagen.csv")
     f1, f2 = 20, 21
     c1, c2 = float(data.domain.attributes[f1].name), \
              float(data.domain.attributes[f2].name)
     avg = (c1 + c2) / 2
     interpolated = Interpolate([avg])(data)
     av1 = interpolated.X.ravel()
     av2 = data.X[:, [20, 21]].mean(axis=1)
     np.testing.assert_allclose(av1, av2)
def test_time():
    fns = ["collagen", dust(), spectra20nea(), "peach_juice.dpt"]
    for fn in fns:
        print(fn)
        data = Table(fn)
        print(data.X.shape)
        data[0, 2] = np.nan
        t = time.time()
        interpolated = Interpolate(getx(data), handle_nans=False)(data)
        print("no nan", time.time() - t)
        t = time.time()
        intp = Interpolate(getx(data), handle_nans=True)
        intp.interpfn = interp1d_with_unknowns_numpy
        interpolated = intp(data)
        print("nan handling with numpy", time.time() - t)
        intp.interpfn = interp1d_with_unknowns_scipy
        interpolated = intp(data)
        print("nan handling with scipy", time.time() - t)
        assert (not np.any(np.isnan(interpolated.X)))
Ejemplo n.º 10
0
 def test_unknown_elsewhere(self):
     data = Orange.data.Table("iris")
     data.X[0, 1] = np.nan
     data.X[1, 1] = np.nan
     data.X[1, 2] = np.nan
     im = Interpolate(getx(data))
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
Ejemplo n.º 11
0
 def test_slightly_different_domain(self):
     """ If test data has a slightly different domain then (with interpolation)
     we should obtain a similar classification score. """
     for proc in PREPROCESSORS:
         train, test = separate_learn_test(self.collagen)
         train1 = proc(train)
         aucorig = AUC(
             TestOnTestData(train1, test, [LogisticRegressionLearner()]))
         test = destroy_atts_conversion(test)
         test = odd_attr(test)
         train = Interpolate(points=getx(train))(
             train)  # make train capable of interpolation
         train = proc(train)
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertAlmostEqual(aucnow, aucorig, delta=0.02)
         test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertAlmostEqual(
             aucnow, aucorig, delta=0.05)  # the difference should be slight
Ejemplo n.º 12
0
 def test_predict_different_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Interpolate(points=getx(test) - 1.)(test)  # other test domain
     train = Interpolate(points=getx(train))(
         train)  # make train capable of interpolation
     aucshift = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(aucorig, aucshift,
                            delta=0.01)  # shift can decrease AUC slightly
     test = Cut(1000, 1700)(test)
     auccut1 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1100, 1600)(test)
     auccut2 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1200, 1500)(test)
     auccut3 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     # the more we cut the lower precision we get
     self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
Ejemplo n.º 13
0
 def test_unknown_elsewhere_different(self):
     data = Orange.data.Table("iris")
     data.X[0, 1] = np.nan
     data.X[1, 1] = np.nan
     data.X[1, 2] = np.nan
     im = Interpolate(getx(data))
     im.interpfn = interp1d_with_unknowns_numpy
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
     im.interpfn = interp1d_with_unknowns_scipy
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
     save_X = interpolated.X
     im.interpfn = interp1d_wo_unknowns_scipy
     interpolated = im(data)
     self.assertTrue(np.any(np.isnan(interpolated.X)))
     # parts without unknown should be the same
     np.testing.assert_almost_equal(data.X[2:], save_X[2:])
Ejemplo n.º 14
0
 def setUpClass(cls):
     super().setUpClass()
     cls.iris = Orange.data.Table("iris")
     cls.whitelight = Orange.data.Table("whitelight.gsf")
     cls.whitelight_unknown = cls.whitelight.copy()
     cls.whitelight_unknown[0]["value"] = NAN
     # dataset with a single attribute
     iris1 = Orange.data.Table(Orange.data.Domain(cls.iris.domain[:1]), cls.iris)
     # dataset without any attributes
     iris0 = Orange.data.Table(Orange.data.Domain([]), cls.iris)
     # dataset with large blank regions
     irisunknown = Interpolate(np.arange(20))(cls.iris)
     # dataset without any attributes, but XY
     whitelight0 = Orange.data.Table(Orange.data.Domain([], None,
         metas=cls.whitelight.domain.metas), cls.whitelight)
     cls.strange_data = [None, iris1, iris0, irisunknown, whitelight0]
Ejemplo n.º 15
0
 def setUpClass(cls):
     super().setUpClass()
     cls.iris = Orange.data.Table("iris")
     cls.collagen = Orange.data.Table("collagen")
     cls.normal_data = [cls.iris, cls.collagen]
     # dataset with a single attribute
     iris1 = Orange.data.Table(Orange.data.Domain(cls.iris.domain[:1]),
                               cls.iris)
     # dataset without any attributes
     iris0 = Orange.data.Table(Orange.data.Domain([]), cls.iris)
     # dataset with large blank regions
     irisunknown = Interpolate(np.arange(20))(cls.iris)
     cls.unknown_last_instance = cls.iris.copy()
     cls.unknown_last_instance.X[
         73] = NAN  # needs to be unknown after sampling and permutation
     cls.strange_data = [
         iris1, iris0, irisunknown, cls.unknown_last_instance
     ]
Ejemplo n.º 16
0
 def setUpClass(cls):
     super().setUpClass()
     cls.iris = Table("iris")
     cls.collagen = Table("collagen")
     cls.normal_data = [cls.iris, cls.collagen]
     # dataset with a single attribute
     iris1 = Table(Domain(cls.iris.domain[:1]), cls.iris)
     # dataset without any attributes
     iris0 = Table(Domain([]), cls.iris)
     # dataset with large blank regions
     irisunknown = Interpolate(np.arange(20))(cls.iris)
     cls.unknown_last_instance = cls.iris.copy()
     cls.unknown_last_instance.X[
         73] = NAN  # needs to be unknown after sampling and permutation
     # a data set with features with the same names
     sfdomain = Domain([ContinuousVariable("1"), ContinuousVariable("1")])
     cls.same_features = Table(sfdomain, [[0, 1]])
     cls.strange_data = [
         iris1, iris0, irisunknown, cls.unknown_last_instance,
         cls.same_features
     ]
Ejemplo n.º 17
0
 def test_nofloatname(self):
     data = Orange.data.Table("iris")
     interpolated = Interpolate([0.5])(data)
     av1 = interpolated.X.ravel()
     av2 = data.X[:, :2].mean(axis=1)
     np.testing.assert_allclose(av1, av2)
Ejemplo n.º 18
0
import numpy as np
import random
import Orange
from Orange.widgets.utils.annotated_data import get_next_name
from orangecontrib.infrared.data import getx
from orangecontrib.infrared.preprocess import Absorbance, Transmittance, \
    Integrate, Interpolate, Cut, SavitzkyGolayFiltering, \
    GaussianSmoothing, PCADenoising, RubberbandBaseline, \
    Normalize


# Preprocessors that work per sample and should return the same
# result for a sample independent of the other samples
PREPROCESSORS_INDEPENDENT_SAMPLES = [
    Interpolate(np.linspace(1000, 1700, 100)),
    SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2),
    Cut(lowlim=1000, highlim=1800),
    GaussianSmoothing(sd=3.),
    Absorbance(),
    Transmittance(),
    Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
    Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakMax, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakBaseline, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakAt, limits=[[1100]]),
    Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]]),
    RubberbandBaseline(),
    Normalize(method=Normalize.Vector),
Ejemplo n.º 19
0
 def test_same(self):
     """Interpolate values are original values."""
     data = Orange.data.Table("iris")
     interpolated = Interpolate(range(len(data.domain.attributes)))(data)
     np.testing.assert_allclose(interpolated.X, data.X)
Ejemplo n.º 20
0
 def test_predict_different_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = Interpolate(points=getx(test) - 1)(test)  # other test domain
     aucdestroyed = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertTrue(0.45 < aucdestroyed < 0.55)
Ejemplo n.º 21
0
 def test_unknown_middle(self):
     data = Orange.data.Table("iris")
     # whole column in the middle should be interpolated
     data.X[:, 1] = np.nan
     interpolated = Interpolate(getx(data))(data)
     self.assertFalse(np.any(np.isnan(interpolated.X)))