Example #1
0
            def select(data, rows, domain):
                """
                Select the data subset with specified rows and domain subsets.

                If either rows or domain is None they mean select all.
                """
                if rows is not None and domain is not None:
                    return data.from_table(domain, data, rows)
                elif rows is not None:
                    return data.from_table(data.domain, rows)
                elif domain is not None:
                    return data.from_table(domain, data)
                else:
                    return data
Example #2
0
            def select(data, rows, domain):
                """
                Select the data subset with specified rows and domain subsets.

                If either rows or domain is None they mean select all.
                """
                if rows is not None and domain is not None:
                    return data.from_table(domain, data, rows)
                elif rows is not None:
                    return data.from_table(data.domain, rows)
                elif domain is not None:
                    return data.from_table(domain, data)
                else:
                    return data
Example #3
0
    def __call__(self, data):
        """
        Compute and apply discretization of the given data. Returns a new
        data table.

        :param data: data
        :type data: Orange.data.Table
        :return: Orange.data.Table
        """
        def transform(var):
            if is_continuous(var):
                newvar = method(data, var)
                if newvar is not None and len(newvar.values) >= 2:
                    return newvar
                else:
                    return None
            else:
                return var

        method = self.method or discretize.EqualFreq()
        newattrs = [transform(var) for var in data.domain.attributes]
        newattrs = [var for var in newattrs if var is not None]
        domain = Orange.data.Domain(
            newattrs, data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)
Example #4
0
    def __call__(self, data):
        """
        Compute and apply discretization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be discretized.
        """

        def transform(var):
            if var.is_continuous:
                new_var = method(data, var)
                if new_var is not None and \
                        (len(new_var.values) >= 2 or not self.remove_const):
                    return new_var
                else:
                    return None
            else:
                return var

        method = self.method or discretize.EqualFreq()
        attributes = [transform(var) for var in data.domain.attributes]
        attributes = [var for var in attributes if var is not None]
        domain = Orange.data.Domain(
            attributes, data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)
 def __call__(self, data):
     common = _GaussianCommon(self.sd, data.domain)
     atts = [a.copy(compute_value=GaussianFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     xs, xsind, mon, X = _transform_to_sorted_features(data)
     x = xs[xsind]
     newd = np.zeros_like(data.X)
     for rowi, row in enumerate(X):
         # remove NaNs which ConvexHull can not handle
         source = np.column_stack((x, row))
         source = source[~np.isnan(source).any(axis=1)]
         try:
             v = ConvexHull(source).vertices
         except QhullError:
             # FIXME notify user
             baseline = np.zeros_like(row)
         else:
             if self.peak_dir == RubberbandBaseline.PeakPositive:
                 v = np.roll(v, -v.argmin())
                 v = v[:v.argmax() + 1]
             elif self.peak_dir == RubberbandBaseline.PeakNegative:
                 v = np.roll(v, -v.argmax())
                 v = v[:v.argmin() + 1]
             # If there are NaN values at the edges of data then convex hull
             # does not include the endpoints. Because the same values are also
             # NaN in the current row, we can fill them with NaN (bounds_error
             # achieves this).
             baseline = interp1d(source[v, 0],
                                 source[v, 1],
                                 bounds_error=False)(x)
         finally:
             if self.sub == 0:
                 newd[rowi] = row - baseline
             else:
                 newd[rowi] = baseline
     return _transform_back_to_features(xsind, mon, newd)
Example #7
0
    def __call__(self, data):
        if self.center is None and self.scale is None:
            return data

        def transform(var):
            dist = distribution.get_distribution(data, var)
            if self.center != self.NoCentering:
                c = self.center(dist)
                dist[0, :] -= c
            else:
                c = 0

            if self.scale != self.NoScaling:
                s = self.scale(dist)
                if s < 1e-15:
                    s = 1
            else:
                s = 1
            factor = 1 / s
            transformed_var = var.copy(
                compute_value=transformation.Normalizer(var, c, factor))
            if s != 1:
                transformed_var.number_of_decimals = 3
            return transformed_var

        newvars = []
        for var in data.domain.attributes:
            if var.is_continuous:
                newvars.append(transform(var))
            else:
                newvars.append(var)
        domain = Orange.data.Domain(newvars, data.domain.class_vars,
                                    data.domain.metas)
        return data.from_table(domain, data)
    def __call__(self, data):
        if data.domain != self.domain:
            data = data.from_table(self.domain, data)

        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(
                    data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
 def __call__(self, data):
     common = _IntegrateCommon(data.domain)
     atts = []
     if self.limits:
         methods = self.methods
         if not isinstance(methods, Iterable):
             methods = [methods] * len(self.limits)
         names = self.names
         if not names:
             names = [
                 " - ".join("{0}".format(e) for e in l) for l in self.limits
             ]
         # no names in data should be repeated
         used_names = [
             var.name for var in data.domain.variables + data.domain.metas
         ]
         for i, n in enumerate(names):
             n = get_next_name(used_names, n)
             names[i] = n
             used_names.append(n)
         for limits, method, name in zip(self.limits, methods, names):
             atts.append(
                 Orange.data.ContinuousVariable(name=name,
                                                compute_value=method(
                                                    limits, common)))
     if not self.metas:
         domain = Orange.data.Domain(atts,
                                     data.domain.class_vars,
                                     metas=data.domain.metas)
     else:
         domain = Orange.data.Domain(data.domain.attributes,
                                     data.domain.class_vars,
                                     metas=data.domain.metas + tuple(atts))
     return data.from_table(domain, data)
Example #10
0
    def __call__(self, data):
        if self.center is None and self.scale is None:
            return data

        def transform(var):
            dist = distribution.get_distribution(data, var)
            if self.center != self.NoCentering:
                c = self.center(dist)
                dist[0, :] -= c
            else:
                c = 0

            if self.scale != self.NoScaling:
                s = self.scale(dist)
                if s < 1e-15:
                    s = 1
            else:
                s = 1
            factor = 1 / s
            transformed_var = var.copy(
                compute_value=transformation.Normalizer(var, c, factor))
            if s != 1:
                transformed_var.number_of_decimals = 3
            return transformed_var

        newvars = []
        for var in data.domain.attributes:
            if var.is_continuous:
                newvars.append(transform(var))
            else:
                newvars.append(var)
        domain = Orange.data.Domain(newvars, data.domain.class_vars,
                                    data.domain.metas)
        return data.from_table(domain, data)
Example #11
0
    def __call__(self, data):
        """
        Compute and apply discretization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be discretized.
        """

        def transform(var):
            if var.is_continuous:
                new_var = method(data, var)
                if new_var is not None and \
                        (len(new_var.values) >= 2 or not self.remove_const):
                    return new_var
                else:
                    return None
            else:
                return var

        method = self.method or discretize.EqualFreq()
        attributes = [transform(var) for var in data.domain.attributes]
        attributes = [var for var in attributes if var is not None]
        domain = Orange.data.Domain(
            attributes, data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)
Example #12
0
    def __call__(self, data):
        if self.center is None and self.scale is None:
            return data

        def transform(var):
            dist = distribution.get_distribution(data, var)
            if self.center:
                c = self.center(dist)
                dist[0, :] -= c
            else:
                c = 0

            if self.scale:
                s = self.scale(dist)
                if s < 1e-15:
                    s = 1
            else:
                s = 1
            factor = 1 / s
            return var.copy(compute_value=preprocess.transformation.Normalizer(
                var, c, factor))

        newvars = []
        for var in data.domain.attributes:
            if var.is_continuous:
                newvars.append(transform(var))
            else:
                newvars.append(var)
        domain = Orange.data.Domain(newvars, data.domain.class_vars,
                                    data.domain.metas)
        return data.from_table(domain, data)
Example #13
0
 def __call__(self, data):
     common = _GaussianCommon(self.sd, data.domain)
     atts = [a.copy(compute_value=GaussianFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #14
0
 def __call__(self, data):
     dc = continuizer.DomainContinuizer(
         zero_based=self.zero_based,
         multinomial_treatment=self.multinimial_treatment,
     )
     domain = dc(data)
     return data.from_table(domain, data)
Example #15
0
    def __call__(self, data):
        if data.domain != self.domain:
            data = data.from_table(self.domain, data)

        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(method=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            # attr normalization applies to entire spectrum, regardless of limits
            # meta indices are -ve and start at -1
            if self.attr not in (None, "None", ""):
                attr_index = -1 - data.domain.index(self.attr)
                factors = data.metas[:, attr_index].astype(float)
                data.X /= factors[:, None]
        return data.X
Example #16
0
 def __call__(self, data):
     common = _CurveShiftCommon(self.amount, data.domain)
     atts = [a.copy(compute_value=CurveShiftFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
 def __call__(self, data):
     common = _CurveShiftCommon(self.amount, data.domain)
     atts = [a.copy(compute_value=CurveShiftFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #18
0
 def __call__(self, data):
     dc = continuize.DomainContinuizer(
         zero_based=self.zero_based,
         multinomial_treatment=self.multinimial_treatment,
     )
     domain = dc(data)
     return data.from_table(domain, data)
Example #19
0
 def __call__(self, data):
     common = _LinearBaselineCommon(self.peak_dir, self.sub,
                                        data.domain)
     atts = [a.copy(compute_value=LinearBaselineFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
 def __call__(self, data):
     common = _SavitzkyGolayCommon(self.window, self.polyorder,
                                   self.deriv, data.domain)
     atts = [a.copy(compute_value=SavitzkyGolayFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
 def __call__(self, data):
     common = _NormalizeCommon(self.method, self.lower, self.upper,
                               self.int_method, self.attr, data.domain)
     atts = [a.copy(compute_value=NormalizeFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
 def __call__(self, data):
     common = _LinearBaselineCommon(self.peak_dir, self.sub, self.zero_points,
                                    data.domain)
     atts = [a.copy(compute_value=LinearBaselineFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #23
0
 def __call__(self, data):
     common = _NormalizeCommon(self.method, self.lower, self.upper,
                                        self.int_method, self.attr, data.domain)
     atts = [a.copy(compute_value=NormalizeFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #24
0
    def __call__(self, data):
        from . import continuize

        continuizer = continuize.DomainContinuizer(
            zero_based=self.zero_based,
            multinomial_treatment=self.multinomial_treatment)
        domain = continuizer(data)
        return data.from_table(domain, data)
Example #25
0
    def __call__(self, data):
        from . import continuize

        continuizer = continuize.DomainContinuizer(
            zero_based=self.zero_based,
            multinomial_treatment=self.multinomial_treatment)
        domain = continuizer(data)
        return data.from_table(domain, data)
Example #26
0
 def __call__(self, data):
     common = _SavitzkyGolayCommon(self.window, self.polyorder,
                                   self.deriv, data.domain)
     atts = [ a.copy(compute_value=SavitzkyGolayFeature(i, common))
                     for i,a in enumerate(data.domain.attributes) ]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #27
0
 def __call__(self, data, variable):
     variable = data.domain[variable]
     domain = domain_with_class_var(data.domain, variable)
     data = data.from_table(domain, data)
     model = self.learner(data)
     assert model.domain.class_var == variable
     return variable.copy(
         compute_value=ReplaceUnknownsModel(variable, model))
Example #28
0
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     return savgol_filter(data.X,
                          window_length=self.window,
                          polyorder=self.polyorder,
                          deriv=self.deriv,
                          mode="nearest")
Example #29
0
 def __call__(self, data):
     common = self._cl_common(self.reference, data.domain)
     newattrs = [Orange.data.ContinuousVariable(
         name=var.name, compute_value=self._cl_feature(i, common))
         for i, var in enumerate(data.domain.attributes)]
     domain = Orange.data.Domain(
         newattrs, data.domain.class_vars, data.domain.metas)
     return data.from_table(domain, data)
Example #30
0
    def commit(self):
        if not self.data:
            self.send("Normalized expression array", None)
            self.send("Filtered expression array", None)
            return

        G, R = self.merged_splits
        Gc, Rc = self.centered
        ind1, ind2 = self.split_ind

        gfactor = Gc / G
        domain = self.data.domain
        newmetas = []
        M = []

        _, _, axis = self.getSelectedGroup()
        if self.appendZScore and axis == 1:
            attr = Orange.data.ContinuousVariable("Z-Score")
            newmetas.append(attr)
            M.append(self.z_scores.filled(numpy.nan))

        if self.appendRIValues and axis == 1:
            r_attr = Orange.data.ContinuousVariable("Log Ratio")
            i_attr = Orange.data.ContinuousVariable("Intensity")
            ratio, intensity = expression.ratio_intensity(Gc, Rc)
            newmetas.extend([r_attr, i_attr])
            M.extend([ratio.filled(numpy.nan),
                      intensity.filled(numpy.nan)])

        if newmetas:
            domain = Orange.data.Domain(
                self.data.domain.attributes, self.data.domain.class_vars,
                self.data.domain.metas + tuple(newmetas))

        data = Orange.data.Table.from_table(domain, self.data)
        data.ensure_copy()

        if axis == 0:
            data.X[ind1, :] *= gfactor.reshape((1, -1))
        else:
            data.X[:, ind1] *= gfactor.reshape((-1, 1))

        for i, mcol in enumerate(reversed(M)):
            data.metas[:, -i - 1] = mcol

        selected_indices = numpy.flatnonzero(
            numpy.abs(self.z_scores.filled(0)) >= self.zCutoff)

        if axis == 0:
            attrs = [data.domain[i] for i in selected_indices]
            domain = Orange.data.Domain(attrs, data.domain.class_vars,
                                        data.domain.metas)
            filtered_data = data.from_table(domain, data)
        else:
            filtered_data = data[selected_indices]

        self.send("Normalized expression array", data)
        self.send("Filtered expression array", filtered_data)
Example #31
0
 def test_meta_object_dtype(self):
     # gh-1875: Test on mixed string/discrete metas
     data = self.data[::5]
     domain = Orange.data.Domain(
         data.domain.attributes, [],
         [data.domain["iris"],
          Orange.data.StringVariable("S")])
     data = data.from_table(domain, data)
     self.send_signal(self.widget.Inputs.data, data)
Example #32
0
 def __call__(self, data):
     atts = features_with_interpolation(self.points,
                                        self.kind,
                                        data.domain,
                                        self.handle_nans,
                                        interpfn=self.interpfn)
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #33
0
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     xs, xsind, mon, X = _transform_to_sorted_features(data)
     X, nans = _nan_extend_edges_and_interpolate(xs[xsind], X)
     X = gaussian_filter1d(X, sigma=self.sd, mode="nearest")
     if nans is not None:
         X[nans] = np.nan
     return _transform_back_to_features(xsind, mon, X)
 def __call__(self, data):
     common = ARPLSCommon(self.lam, self.itermax, self.ratio, data.domain)
     atts = [
         a.copy(compute_value=ARPLSFeature(i, common))
         for i, a in enumerate(data.domain.attributes)
     ]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #35
0
 def partial_fit(self, data):
     if isinstance(data, Orange.data.Storage):
         if data.domain != self.pre_domain:
             data = data.from_table(self.pre_domain, data)
         self.proj.partial_fit(data.X)
     else:
         self.proj.partial_fit(data)
     self.__dict__.update(self.proj.__dict__)
     return self
Example #36
0
    def commit(self):
        if not self.data:
            self.send("Normalized expression array", None)
            self.send("Filtered expression array", None)
            return

        G, R = self.merged_splits
        Gc, Rc = self.centered
        ind1, ind2 = self.split_ind

        gfactor = Gc / G
        domain = self.data.domain
        newmetas = []
        M = []

        _, _, axis = self.getSelectedGroup()
        if self.appendZScore and axis == 1:
            attr = Orange.data.ContinuousVariable("Z-Score")
            newmetas.append(attr)
            M.append(self.z_scores.filled(numpy.nan))

        if self.appendRIValues and axis == 1:
            r_attr = Orange.data.ContinuousVariable("Log Ratio")
            i_attr = Orange.data.ContinuousVariable("Intensity")
            ratio, intensity = expression.ratio_intensity(Gc, Rc)
            newmetas.extend([r_attr, i_attr])
            M.extend([ratio.filled(numpy.nan), intensity.filled(numpy.nan)])

        if newmetas:
            domain = Orange.data.Domain(
                self.data.domain.attributes, self.data.domain.class_vars,
                self.data.domain.metas + tuple(newmetas))

        data = Orange.data.Table.from_table(domain, self.data)
        data.ensure_copy()

        if axis == 0:
            data.X[ind1, :] *= gfactor.reshape((1, -1))
        else:
            data.X[:, ind1] *= gfactor.reshape((-1, 1))

        for i, mcol in enumerate(reversed(M)):
            data.metas[:, -i - 1] = mcol

        selected_indices = numpy.flatnonzero(
            numpy.abs(self.z_scores.filled(0)) >= self.zCutoff)

        if axis == 0:
            attrs = [data.domain[i] for i in selected_indices]
            domain = Orange.data.Domain(attrs, data.domain.class_vars,
                                        data.domain.metas)
            filtered_data = data.from_table(domain, data)
        else:
            filtered_data = data[selected_indices]

        self.send("Normalized expression array", data)
        self.send("Filtered expression array", filtered_data)
Example #37
0
 def partial_fit(self, data):
     if isinstance(data, Orange.data.Storage):
         if data.domain != self.pre_domain:
             data = data.from_table(self.pre_domain, data)
         self.proj.partial_fit(data.X)
     else:
         self.proj.partial_fit(data)
     self.__dict__.update(self.proj.__dict__)
     return self
Example #38
0
 def test_meta_object_dtype(self):
     # gh-1875: Test on mixed string/discrete metas
     data = self.data[::5]
     domain = Orange.data.Domain(
         data.domain.attributes, [],
         [data.domain["iris"],
          Orange.data.StringVariable("S")]
     )
     data = data.from_table(domain, data)
     self.send_signal(self.widget.Inputs.data, data)
 def __call__(self, data):
     common = _DespikeCommon(self.threshold, self.cutoff, self.dis,
                             data.domain)
     atts = [
         a.copy(compute_value=DespikeFeature(i, common))
         for i, a in enumerate(data.domain.attributes)
     ]
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #40
0
 def __call__(self, data):
     if data.domain != self.pca.pre_domain:
         data = data.from_table(self.pca.pre_domain, data)
     pca_space = self.pca.transform(data.X)
     if self.components is not None:
         #set unused components to zero
         remove = np.ones(pca_space.shape[1])
         remove[self.components] = 0
         remove = np.extract(remove, np.arange(pca_space.shape[1]))
         pca_space[:, remove] = 0
     return self.pca.proj.inverse_transform(pca_space)
Example #41
0
    def __call__(self, data, variable):
        variable = data.domain[variable]
        domain = domain_with_class_var(data.domain, variable)

        if self.learner.check_learner_adequacy(domain):
            data = data.from_table(domain, data)
            model = self.learner(data)
            assert model.domain.class_var == variable
            return variable.copy(compute_value=ReplaceUnknownsModel(variable, model))
        else:
            raise ValueError("`{}` doesn't support domain type".format(self.learner.name))
Example #42
0
 def __call__(self, data):
     x = getx(data)
     if not self.inverse:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is None or self.lowlim <= v) and
                       (self.highlim is None or v <= self.highlim)]
     else:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is not None and v <= self.lowlim) or
                       (self.highlim is not None and self.highlim <= v)]
     domain = Orange.data.Domain(okattrs, data.domain.class_vars, metas=data.domain.metas)
     return data.from_table(domain, data)
Example #43
0
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     if self.ref:
         # Calculate from single-channel data
         absd = self.ref.X / data.X
         np.log10(absd, absd)
     else:
         # Calculate from transmittance data
         absd = np.log10(data.X)
         absd *= -1
     return absd
Example #44
0
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     xs, xsind, mon, X = _transform_to_sorted_features(data)
     X, nans = _nan_extend_edges_and_interpolate(xs[xsind], X)
     X = savgol_filter(X, window_length=self.window,
                          polyorder=self.polyorder,
                          deriv=self.deriv, mode="nearest")
     # set NaNs where there were NaNs in the original array
     if nans is not None:
         X[nans] = np.nan
     return _transform_back_to_features(xsind, mon, X)
Example #45
0
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     if self.ref:
         # Calculate from single-channel data
         transd = data.X / self.ref.X
     else:
         # Calculate from absorbance data
         transd = data.X.copy()
         transd *= -1
         np.power(10, transd, transd)
     return transd
 def __call__(self, data):
     x = getx(data)
     if not self.inverse:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is None or self.lowlim <= v) and
                       (self.highlim is None or v <= self.highlim)]
     else:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is not None and v <= self.lowlim) or
                       (self.highlim is not None and self.highlim <= v)]
     domain = Orange.data.Domain(okattrs, data.domain.class_vars, metas=data.domain.metas)
     return data.from_table(domain, data)
Example #47
0
    def __call__(self, data):
        common = _XASnormalizationCommon(self.edge, self.preedge_params,
                                         self.postedge_params, data.domain)
        newattrs = [ContinuousVariable(name=var.name,
                                       compute_value=XASnormalizationFeature(i, common))
                    for i, var in enumerate(data.domain.attributes)]
        newmetas = data.domain.metas + (ContinuousVariable(
            name='edge_jump', compute_value=XASnormalizationFeature(len(newattrs), common)),)

        domain = Orange.data.Domain(
                    newattrs, data.domain.class_vars, newmetas)

        return data.from_table(domain, data)
Example #48
0
    def __call__(self, data):
        """
        Apply an imputation method to the given data set. Returns a new
        data table with missing values replaced by their imputations.

        Parameters
        ----------
        data : Orange.data.Table
            An input data table.
        """

        method = self.method or impute.Average()
        newattrs = [method(data, var) for var in data.domain.attributes]
        domain = Orange.data.Domain(newattrs, data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)
Example #49
0
    def __call__(self, data):
        if data and len(data.domain.attributes):
            maxpca = min(len(data.domain.attributes), len(data))
            pca = Orange.projection.PCA(n_components=min(maxpca, self.components))(data)
            commonfn = _PCAReconstructCommon(pca)
            nats = [at.copy(compute_value=PCADenoisingFeature(i, commonfn))
                    for i, at in enumerate(data.domain.attributes)]
        else:
            # FIXME we should have a warning here
            nats = [ at.copy() for at in data.domain.attributes ]  # unknown values

        domain = Orange.data.Domain(nats, data.domain.class_vars,
                                    data.domain.metas)

        return data.from_table(domain, data)
Example #50
0
    def __call__(self, data):
        def transform(var):
            if is_continuous(var):
                newvar = self.method(data, var)
                if newvar is not None and len(newvar.values) >= 2:
                    return newvar
                else:
                    return None
            else:
                return var

        newattrs = [transform(var) for var in data.domain.attributes]
        newattrs = [var for var in newattrs if var is not None]
        domain = Orange.data.Domain(
            newattrs, data.domain.class_vars, data.domain.metas)

        return data.from_table(domain, data)
Example #51
0
 def __call__(self, data):
     # convert to data domain if any conversion is possible,
     # otherwise we use the interpolator directly to make domains compatible
     if self.domain is not None and data.domain != self.domain \
             and any(at.compute_value for at in self.domain.attributes):
         data = data.from_table(self.domain, data)
     x = getx(data)
     # removing whole NaN columns from the data will effectively replace
     # NaNs that are not on the edges with interpolated values
     ys = data.X
     if self.handle_nans:
         x, ys = remove_whole_nan_ys(x, ys)  # relatively fast
     if len(x) == 0:
         return np.ones((len(data), len(self.points)))*np.nan
     interpfn = self.interpfn
     if interpfn is None:
         if self.handle_nans and np.isnan(ys).any():
             if self.kind == "linear":
                 interpfn = interp1d_with_unknowns_numpy
             else:
                 interpfn = interp1d_with_unknowns_scipy
         else:
             interpfn = interp1d_wo_unknowns_scipy
     return interpfn(x, ys, self.points, kind=self.kind)
Example #52
0
    def __call__(self, data, ret=Value):
        if not 0 <= ret <= 2:
            raise ValueError("invalid value of argument 'ret'")
        if (ret > 0
            and any(isinstance(v, Orange.data.ContinuousVariable)
                    for v in self.domain.class_vars)):
            raise ValueError("cannot predict continuous distributions")

        # Call the predictor
        if isinstance(data, np.ndarray):
            prediction = self.predict(np.atleast_2d(data))
        elif isinstance(data, scipy.sparse.csr.csr_matrix):
            prediction = self.predict(data)
        elif isinstance(data, Orange.data.Instance):
            if data.domain != self.domain:
                data = Orange.data.Instance(self.domain, data)
            prediction = self.predict_storage(data)
        elif isinstance(data, Orange.data.Table):
            if data.domain != self.domain:
                data = data.from_table(self.domain, data)
            prediction = self.predict_storage(data)
        else:
            raise TypeError("Unrecognized argument (instance of '{}')".format(
                            type(data).__name__))

        # Parse the result into value and probs
        multitarget = len(self.domain.class_vars) > 1
        if isinstance(prediction, tuple):
            value, probs = prediction
        elif prediction.ndim == 1 + multitarget:
            value, probs = prediction, None
        elif prediction.ndim == 2 + multitarget:
            value, probs = None, prediction
        else:
            raise TypeError("model returned a %i-dimensional array",
                            prediction.ndim)

        # Ensure that we have what we need to return
        if ret != Model.Probs and value is None:
            value = np.argmax(probs, axis=-1)
        if ret != Model.Value and probs is None:
            if multitarget:
                max_card = max(len(c.values)
                               for c in self.domain.class_vars)
                probs = np.zeros(value.shape + (max_card,), float)
                for i, cvar in enumerate(self.domain.class_vars):
                    probs[:, i, :], _ = bn.bincount(np.atleast_2d(value[:, i]),
                                                    max_card - 1)
            else:
                probs, _ = bn.bincount(np.atleast_2d(value),
                                       len(self.domain.class_var.values) - 1)
            if ret == Model.ValueProbs:
                return value, probs
            else:
                return probs

        # Return what we need to
        if ret == Model.Probs:
            return probs
        if isinstance(data, Orange.data.Instance) and not multitarget:
            value = Orange.data.Value(self.domain.class_var, value[0])
        if ret == Model.Value:
            return value
        else:  # ret == Model.ValueProbs
            return value, probs
Example #53
0
 def __call__(self, data):
     return data.from_table(self.domain, data)
Example #54
0
 def __call__(self, data):
     newattrs = [self.method(data, var) for var in data.domain.attributes]
     domain = Orange.data.Domain(
         newattrs, data.domain.class_vars, data.domain.metas)
     return data.from_table(domain, data)
Example #55
0
 def __call__(self, data):
     if data.domain != self.projection.pre_domain:
         data = data.from_table(self.projection.pre_domain, data)
     return self.projection.transform(data.X)[:, self.feature]
Example #56
0
 def __call__(self, data):
     atts = features_with_interpolation(self.points, self.kind, data.domain,
                                        self.handle_nans, interpfn=self.interpfn)
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas)
     return data.from_table(domain, data)
Example #57
0
 def __call__(self, data):
     if data.domain != self.pca.pre_domain:
         data = data.from_table(self.pca.pre_domain, data)
     return self.pca.transform(data.X)
Example #58
0
def append_columns(data, attributes=(), class_vars=(), metas=()):
    # type: (Orange.data.Table, ColSpec, ColSpec, ColSpec) -> Orange.data.Table
    """
    Append a set of columns to a data table.


    Parameters
    ----------
    data : Orange.data.Table
        Primary table.
    attributes : Sequence[Tuple[Orange.data.Variable], Sequence[float]]
        A Sequence of variable and column data tuples to append to the
        `data`.
    class_vars : Sequence[Tuple[Orange.data.Variable], Sequence[float]]
        A Sequence of variable and column data tuples to append to the
        `data` in the
    metas : Sequence[Tuple[Orange.data.Variable], Sequence[float]]
        A Sequence of variable and column data tuples to append to the
        `data`

    Returns
    -------
    data : Orange.data.Table
        A copy of the original `data` input extended with all columns from
        `attributes`, `class_vars`, `metas` parameters

    Note
    ----
    All variables in the original and new columns should be distinct.
    """
    domain = data.domain
    new_attributes = tuple(map(itemgetter(0), attributes))
    new_class_vars = tuple(map(itemgetter(0), class_vars))
    new_metas = tuple(map(itemgetter(0), metas))

    new_domain = Orange.data.Domain(
        domain.attributes + new_attributes,
        domain.class_vars + new_class_vars,
        domain.metas + new_metas
    )

    def ascolumn(array, n):
        # type: (Sequence[float], int) -> numpy.ndarray
        array = numpy.asarray(array)
        if array.ndim < 2:
            array = array.reshape((n, 1))
        return array
    N = len(data)

    attr_cols = [ascolumn(col, N) for _, col in attributes]
    class_cols = [ascolumn(col, N) for _, col in class_vars]
    meta_cols = [ascolumn(col, N) for _, col in metas]

    new_data = data.from_table(new_domain, data)

    for i, (var, col) in enumerate(zip(new_attributes, attr_cols),
                                   start=len(domain.attributes)):
        assert new_data.domain.attributes[i] is var
        new_data.X[:, i] = col.ravel()

    for i, (var, col) in enumerate(zip(new_class_vars, class_cols),
                                   start=len(domain.class_vars)):
        assert new_data.domain.class_vars[i] is var
        new_data._Y[:, i] = col.ravel()

    for i, (var, col) in enumerate(zip(new_metas, meta_cols),
                                   start=len(domain.metas)):
        assert new_data.domain.metas[i] is var
        new_data.metas[:, i] = col.ravel()

    return new_data