def commit(self): out = None self.Error.dxzero.clear() self.Error.too_many_points.clear() if self.data: if self.input_radio == 0: points = getx(self.data) out = Interpolate(points)(self.data) elif self.input_radio == 1: xs = getx(self.data) if not self.dx > 0: self.Error.dxzero() else: xmin = self.xmin if self.xmin is not None else np.min(xs) xmax = self.xmax if self.xmax is not None else np.max(xs) xmin, xmax = min(xmin, xmax), max(xmin, xmax) reslength = abs(math.ceil((xmax - xmin) / self.dx)) if reslength < 10002: points = np.arange(xmin, xmax, self.dx) out = Interpolate(points)(self.data) else: self.Error.too_many_points(reslength) elif self.input_radio == 2 and self.data_points is not None: out = Interpolate(self.data_points)(self.data) self.send("Interpolated data", out)
def test_cut_both(self): d = self.collagen dcut = Cut(lowlim=0, highlim=2)(d) self.assertFalse(getx(dcut)) dcut = Cut(lowlim=1000, highlim=1100)(d) self.assertGreaterEqual(min(getx(dcut)), 1000) self.assertLessEqual(max(getx(dcut)), 1100)
def test_slightly_different_domain(self): """ If test data has a slightly different domain then (with interpolation) we should obtain a similar classification score. """ learner = LogisticRegressionLearner(preprocessors=[]) for proc in PREPROCESSORS: # LR that can not handle unknown values train, test = separate_learn_test(self.collagen) train1 = proc(train) aucorig = AUC(TestOnTestData(train1, test, [learner])) test = destroy_atts_conversion(test) test = odd_attr(test) # a subset of points for training so that all test sets points # are within the train set points, which gives no unknowns train = Interpolate(points=getx(train)[1:-3])( train) # make train capable of interpolation train = proc(train) # explicit domain conversion test to catch exceptions that would # otherwise be silently handled in TestOnTestData _ = Orange.data.Table(train.domain, test) aucnow = AUC(TestOnTestData(train, test, [learner])) self.assertAlmostEqual(aucnow, aucorig, delta=0.02) test = Interpolate(points=getx(test) - 1.)(test) # also do a shift _ = Orange.data.Table(train.domain, test) # explicit call again aucnow = AUC(TestOnTestData(train, test, [learner])) self.assertAlmostEqual( aucnow, aucorig, delta=0.05) # the difference should be slight
def test_roundtrip(self): d1 = Orange.data.Table("map_test.xyz") _, fn = tempfile.mkstemp(suffix=".xyz") d1.save(fn) d2 = Orange.data.Table(fn) np.testing.assert_equal(d1.X, d2.X) np.testing.assert_equal(getx(d1), getx(d2)) np.testing.assert_equal(d1.metas, d2.metas) os.remove(fn)
def test_read(self): d = Orange.data.Table("map_test.xyz") self.assertEqual(len(d), 16) self.assertEqual(d[1]["map_x"], 1) self.assertEqual(d[1]["map_y"], 7) self.assertEqual(d[1][1], 0.1243) self.assertEqual(d[2][2], 0.1242) self.assertEqual(min(getx(d)), 1634.84) self.assertEqual(max(getx(d)), 1641.69)
def set_data(self, data): self.data = data if self.data and len(getx(data)): points = getx(data) self.xmin_edit.setPlaceholderText(str(np.min(points))) self.xmax_edit.setPlaceholderText(str(np.max(points))) else: self.xmin_edit.setPlaceholderText("") self.xmax_edit.setPlaceholderText("") self.commit()
def test_autointerpolate(self): self.send_signal("Data", self.collagen) out = self.get_output("Interpolated data") np.testing.assert_equal(getx(self.collagen), getx(out)) # no auto-interpolation non_interp = Orange.data.Table(self.collagen.domain, self.peach) self.assertTrue(np.isnan(non_interp.X).all()) # auto-interpolation auto_interp = Orange.data.Table(out.domain, self.peach) self.assertFalse(np.isnan(auto_interp.X).all()) np.testing.assert_equal(getx(self.collagen), getx(auto_interp))
def test_interpolate_points(self): self.assertFalse(self.widget.Warning.reference_data_missing.is_shown()) self.widget.controls.input_radio.buttons[2].click() self.assertTrue(self.widget.Warning.reference_data_missing.is_shown()) self.send_signal("Data", self.peach) self.assertTrue(self.widget.Warning.reference_data_missing.is_shown()) self.send_signal("Points", self.collagen) self.assertFalse(self.widget.Warning.reference_data_missing.is_shown()) out = self.get_output("Interpolated data") np.testing.assert_equal(getx(self.collagen), getx(out)) self.send_signal("Points", None) self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
def set_data(self, data): self.clear_data() self.attrs[:] = [] if data is not None: self.attrs[:] = ["(Same color)"] + [ var for var in chain(data.domain, data.domain.metas) if isinstance(var, str) or var.is_discrete] self.color_attr = 0 if data is not None: if self.data: self.rescale_next = not data.domain == self.data.domain else: self.rescale_next = True self.data = data # reset selection if dataset sizes do not match if self.selected_indices and \ (max(self.selected_indices) >= len(self.data) or self.data_size != len(self.data)): self.selected_indices.clear() self.data_size = len(self.data) # get and sort input data x = getx(self.data) xsind = np.argsort(x) self.data_x = x[xsind] self.data_xsind = xsind self._set_subset_indices() # refresh subset indices according to the current subset
def set_data(self, data): old_domain = self.data.domain if self.data else None self.clear_data() domain = data.domain if data is not None else None self.feature_color_model.set_domain(domain) if old_domain and domain != old_domain: # do not reset feature_color self.feature_color = self.feature_color_model[ 0] if self.feature_color_model else None if data is not None: if self.data: self.rescale_next = not data.domain == self.data.domain else: self.rescale_next = True self.data = data # reset selection if dataset sizes do not match if self.selected_indices and \ (max(self.selected_indices) >= len(self.data) or self.data_size != len(self.data)): self.selected_indices.clear() self.data_size = len(self.data) # get and sort input data x = getx(self.data) xsind = np.argsort(x) self.data_x = x[xsind] self.data_xsind = xsind self._set_subset_indices( ) # refresh subset indices according to the current subset
def set_data(self, data, rescale="auto"): self.clear_graph() self.clear_data() self.attrs[:] = [] if data is not None: self.attrs[:] = ["(Same color)"] + [ var for var in chain(data.domain, data.domain.metas) if isinstance(var, str) or var.is_discrete] self.color_attr = 0 self.set_pen_colors() if data is not None: if rescale == "auto": if self.data: rescale = not data.domain == self.data.domain else: rescale = True self.data = data # reset selection if dataset sizes do not match if self.selected_indices and \ (max(self.selected_indices) >= len(self.data) or self.data_size != len(self.data)): self.selected_indices.clear() self.data_size = len(self.data) # get and sort input data x = getx(self.data) xsind = np.argsort(x) self.data_x = x[xsind] self.data_ys = data.X[:, xsind] self.update_view() if rescale == True: self.plot.vb.autoRange()
def __call__(self, data): if data.domain != self.domain: data = data.from_table(self.domain, data) if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(method=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X elif self.method == Normalize.Attribute: # attr normalization applies to entire spectrum, regardless of limits # meta indices are -ve and start at -1 if self.attr not in (None, "None", ""): attr_index = -1 - data.domain.index(self.attr) factors = data.metas[:, attr_index].astype(float) data.X /= factors[:, None] return data.X
def __call__(self, data): if data.domain != self.domain: data = data.from_table(self.domain, data) x = getx(data) newd = np.zeros_like(data.X) for rowi, row in enumerate(data.X): # remove NaNs which ConvexHull can not handle source = np.column_stack((x, row)) source = source[~np.isnan(source).any(axis=1)] try: v = ConvexHull(source).vertices except QhullError: # FIXME notify user baseline = np.zeros_like(row) else: if self.peak_dir == 0: v = np.roll(v, -v.argmax()) v = v[:v.argmin() + 1] elif self.peak_dir == 1: v = np.roll(v, -v.argmin()) v = v[:v.argmax() + 1] # If there are NaN values at the edges of data then convex hull # does not include the endpoints. Because the same values are also # NaN in the current row, we can fill them with NaN (bounds_error # achieves this). baseline = interp1d(source[v, 0], source[v, 1], bounds_error=False)(x) finally: if self.sub == 0: newd[rowi] = row - baseline else: newd[rowi] = baseline return newd
def _transform_to_sorted_features(data): xs = getx(data) xsind = np.argsort(xs) mon = is_increasing(xsind) X = data.X X = X if mon else X[:, xsind] return xs, xsind, mon, X
def set_preview_data(self, data): if not self.user_changed: x = getx(data) if len(x): self.set_value("Low limit", min(x)) self.set_value("High limit", max(x)) self.edited.emit()
def test_predict_savgol_another_interpolate(self): train, test = separate_learn_test(self.collagen) train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train) auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) train = Interpolate(points=getx(train))(train) aucai = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(auc, aucai, delta=0.02)
def test_unordered_features(self): data = self.collagen data_reversed = reverse_attr(data) data_shuffle = shuffle_attr(data) for proc in PREPROCESSORS: comparison = np.testing.assert_equal # TODO find out why there are small differences for certain preprocessors if isinstance(proc, (RubberbandBaseline, Normalize, PCADenoising)): comparison = lambda x,y: np.testing.assert_almost_equal(x, y, decimal=5) pdata = proc(data) X = pdata.X[:, np.argsort(getx(pdata))] pdata_reversed = proc(data_reversed) X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))] comparison(X, X_reversed) pdata_shuffle = proc(data_shuffle) X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))] comparison(X, X_shuffle)
def test_predict_samename_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = destroy_atts_conversion(test) train = Interpolate(points=getx(train))( train) # make train capable of interpolation auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertEqual(aucorig, auc)
def show_data(self): self.img.clear() if self.data: xat = self.data.domain[self.attr_x] yat = self.data.domain[self.attr_y] ndom = Orange.data.Domain([xat, yat]) datam = Orange.data.Table(ndom, self.data) coorx = datam.X[:, 0] coory = datam.X[:, 1] lsx = values_to_linspace(coorx) lsy = values_to_linspace(coory) l1, l2 = self.parent.lowlim, self.parent.highlim gx = getx(self.data) if l1 is None: l1 = min(gx) - 1 if l2 is None: l2 = max(gx) + 1 l1, l2 = min(l1, l2), max(l1, l2) imethod = self.parent.integration_methods[ self.parent.integration_method] datai = Integrate(method=imethod, limits=[[l1, l2]])(self.data) di = {} if self.parent.curveplot.selected_indices: ind = list(self.parent.curveplot.selected_indices)[0] di = datai.domain.attributes[0].compute_value.draw_info( self.data[ind:ind + 1]) self.refresh_markings(di) d = datai.X[:, 0] # set data imdata = np.ones((lsy[2], lsx[2])) * float("nan") xindex = index_values(coorx, lsx) yindex = index_values(coory, lsy) imdata[yindex, xindex] = d levels = get_levels(imdata) self.update_color_schema() self.img.setImage(imdata, levels=levels) # shift centres of the pixels so that the axes are useful shiftx = (lsx[1] - lsx[0]) / (2 * (lsx[2] - 1)) shifty = (lsy[1] - lsy[0]) / (2 * (lsy[2] - 1)) left = lsx[0] - shiftx bottom = lsy[0] - shifty width = (lsx[1] - lsx[0]) + 2 * shiftx height = (lsy[1] - lsy[0]) + 2 * shifty self.img.setRect(QRectF(left, bottom, width, height))
def test_line_intersection(self): data = self.collagen x = getx(data) sort = np.argsort(x) x = x[sort] ys = data.X[:, sort] boola = intersect_curves(x, ys, np.array([0, 1.15]), np.array([3000, 1.15])) intc = np.flatnonzero(boola) np.testing.assert_equal(intc, [191, 635, 638, 650, 712, 716, 717, 726])
def test_autointerpolate(self): d1 = Orange.data.Table("peach_juice.dpt") d2 = Orange.data.Table("collagen.csv") d3 = Orange.data.Table(d1.domain, d2) d1x = getx(d1) d2x = getx(d2) #have the correct number of non-nan elements validx = np.where(d1x >= min(d2x), d1x, np.nan) validx = np.where(d1x <= max(d2x), validx, np.nan) self.assertEqual(np.sum(~np.isnan(validx)), np.sum(~np.isnan(d3.X[0]))) #check roundtrip atts = features_with_interpolation(d2x) ndom = Orange.data.Domain(atts, None) dround = Orange.data.Table(ndom, d3) #edges are unknown, the rest roughly the same np.testing.assert_allclose(dround.X[:, 1:-1], d2.X[:, 1:-1], rtol=0.011)
def test_time(): fns = ["collagen", dust(), spectra20nea(), "peach_juice.dpt"] for fn in fns: print(fn) data = Table(fn) print(data.X.shape) data[0, 2] = np.nan t = time.time() interpolated = Interpolate(getx(data), handle_nans=False)(data) print("no nan", time.time() - t) t = time.time() intp = Interpolate(getx(data), handle_nans=True) intp.interpfn = interp1d_with_unknowns_numpy interpolated = intp(data) print("nan handling with numpy", time.time() - t) intp.interpfn = interp1d_with_unknowns_scipy interpolated = intp(data) print("nan handling with scipy", time.time() - t) assert (not np.any(np.isnan(interpolated.X)))
def test_unknown_elsewhere(self): data = Orange.data.Table("iris") data.X[0, 1] = np.nan data.X[1, 1] = np.nan data.X[1, 2] = np.nan im = Interpolate(getx(data)) interpolated = im(data) self.assertAlmostEqual(interpolated.X[0, 1], 3.25) self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334) self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667) self.assertFalse(np.any(np.isnan(interpolated.X)))
def test_slightly_different_domain(self): """ If test data has a slightly different domain then (with interpolation) we should obtain a similar classification score. """ for proc in PREPROCESSORS: train, test = separate_learn_test(self.collagen) train1 = proc(train) aucorig = AUC( TestOnTestData(train1, test, [LogisticRegressionLearner()])) test = destroy_atts_conversion(test) test = odd_attr(test) train = Interpolate(points=getx(train))( train) # make train capable of interpolation train = proc(train) aucnow = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(aucnow, aucorig, delta=0.02) test = Interpolate(points=getx(test) - 1.)(test) # also do a shift aucnow = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual( aucnow, aucorig, delta=0.05) # the difference should be slight
def __call__(self, data): x = getx(data) if not self.inverse: okattrs = [at for at, v in zip(data.domain.attributes, x) if (self.lowlim is None or self.lowlim <= v) and (self.highlim is None or v <= self.highlim)] else: okattrs = [at for at, v in zip(data.domain.attributes, x) if (self.lowlim is not None and v <= self.lowlim) or (self.highlim is not None and self.highlim <= v)] domain = Orange.data.Domain(okattrs, data.domain.class_vars, metas=data.domain.metas) return data.from_table(domain, data)
def test_predict_different_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Interpolate(points=getx(test) - 1.)(test) # other test domain train = Interpolate(points=getx(train))( train) # make train capable of interpolation aucshift = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(aucorig, aucshift, delta=0.01) # shift can decrease AUC slightly test = Cut(1000, 1700)(test) auccut1 = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Cut(1100, 1600)(test) auccut2 = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Cut(1200, 1500)(test) auccut3 = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) # the more we cut the lower precision we get self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
def test_interpolate_interval(self): self.widget.controls.input_radio.buttons[1].click() self.send_signal("Data", self.peach) out = self.get_output("Interpolated data") np.testing.assert_almost_equal(np.arange(499.53234, 4000.1161, 10), getx(out)) self.widget.controls.dx.setText("0") self.widget.commit() self.assertTrue(self.widget.Error.dxzero.is_shown()) self.widget.controls.dx.setText("0.001") self.widget.commit() self.assertTrue(self.widget.Error.too_many_points.is_shown()) self.widget.controls.dx.setText("10") self.widget.commit() self.assertFalse(self.widget.Error.dxzero.is_shown()) self.assertFalse(self.widget.Error.too_many_points.is_shown()) self.widget.controls.xmin.setText("4000.1161") self.widget.controls.xmax.setText("499.53234") self.widget.commit() out2 = self.get_output("Interpolated data") np.testing.assert_almost_equal(getx(out2), getx(out)) self.send_signal("Data", None) self.assertTrue(self.get_output("Interpolated data") is None)
def test_cut_single_inverse(self): d = self.collagen dcut = Cut(lowlim=1000, inverse=True)(d) self.assertLessEqual(max(getx(dcut)), 1000) self.assertEqual(min(getx(dcut)), min(getx(d))) dcut = Cut(highlim=1000, inverse=True)(d) self.assertGreaterEqual(min(getx(dcut)), 1000) self.assertEqual(max(getx(dcut)), max(getx(d)))
def __call__(self, data): # convert to data domain if any conversion is possible, # otherwise we use the interpolator directly to make domains compatible if self.domain and data.domain != self.domain \ and any(at.compute_value for at in self.domain.attributes): data = data.from_table(self.domain, data) x = getx(data) if len(x) == 0: return np.ones((len(data), len(self.points))) * np.nan f = interp1d(x, data.X, fill_value=np.nan, bounds_error=False, kind=self.kind) inter = f(self.points) return inter
def test_cut_both_inverse(self): d = self.collagen # cutting out of x interval - need all dcut = Cut(lowlim=0, highlim=2, inverse=True)(d) np.testing.assert_equal(getx(dcut), getx(d)) # cutting in the middle - edged are the same dcut = Cut(lowlim=1000, highlim=1100, inverse=True)(d) dcutx = getx(dcut) self.assertEqual(min(dcutx), min(getx(d))) self.assertEqual(max(dcutx), max(getx(d))) self.assertLess(len(dcutx), len(getx(d))) np.testing.assert_equal(np.where(dcutx < 1100), np.where(dcutx < 1000)) np.testing.assert_equal(np.where(dcutx > 1100), np.where(dcutx > 1000))