Esempio n. 1
0
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
            replace_infs(data.X)
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(
                    data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                replace_infs(data.X)
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
Esempio n. 2
0
    def __call__(self, data):
        """
        Apply randomization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be randomized.

        Returns
        -------
        data : Orange.data.Table
            Randomized data table.
        """
        new_data = data.copy()
        rstate = np.random.RandomState(self.rand_seed)
        # ensure the same seed is not used to shuffle X and Y at the same time
        r1, r2, r3 = rstate.randint(0, 2 ** 32 - 1, size=3, dtype=np.int64)
        if self.rand_type & Randomize.RandomizeClasses:
            new_data.Y = self.randomize(new_data.Y, r1)
        if self.rand_type & Randomize.RandomizeAttributes:
            new_data.X = self.randomize(new_data.X, r2)
        if self.rand_type & Randomize.RandomizeMetas:
            new_data.metas = self.randomize(new_data.metas, r3)
        return new_data
Esempio n. 3
0
    def __call__(self, data):
        """
        Apply randomization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be randomized.

        Returns
        -------
        data : Orange.data.Table
            Randomized data table.
        """
        new_data = data.copy()
        rstate = np.random.RandomState(self.rand_seed)
        # ensure the same seed is not used to shuffle X and Y at the same time
        r1, r2, r3 = rstate.randint(0, 2**32 - 1, size=3, dtype=np.int64)
        if self.rand_type & Randomize.RandomizeClasses:
            new_data.Y = self.randomize(new_data.Y, r1)
        if self.rand_type & Randomize.RandomizeAttributes:
            new_data.X = self.randomize(new_data.X, r2)
        if self.rand_type & Randomize.RandomizeMetas:
            new_data.metas = self.randomize(new_data.metas, r3)
        return new_data
Esempio n. 4
0
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
Esempio n. 5
0
    def __call__(self, data):
        if data.domain != self.domain:
            data = data.from_table(self.domain, data)

        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(method=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            # attr normalization applies to entire spectrum, regardless of limits
            # meta indices are -ve and start at -1
            if self.attr not in (None, "None", ""):
                attr_index = -1 - data.domain.index(self.attr)
                factors = data.metas[:, attr_index].astype(float)
                data.X /= factors[:, None]
        return data.X
    def commit(self):
        self._committimer.stop()
        data = self.data

        if data is not None and self._is_filter_enabled():
            if self.filter_type() in [Cells, Genes]:
                state = self._state
                assert state is not None
                counts = state.x
                cmax = self.limit_upper
                cmin = self.limit_lower
                mask = np.ones(counts.shape, dtype=bool)
                if self.limit_lower_enabled:
                    mask &= cmin <= counts
                if self.limit_upper_enabled:
                    mask &= counts <= cmax

                if self.filter_type() == Cells:
                    assert counts.size == len(data)
                    data = data[mask]
                else:
                    assert counts.size == len(data.domain.attributes)
                    atts = [v for v, m in zip(data.domain.attributes, mask)
                            if m]
                    data = data.from_table(
                        Orange.data.Domain(
                            atts, data.domain.class_vars, data.domain.metas
                        ),
                        data
                    )
                if len(data) == 0 or \
                        len(data.domain) + len(data.domain.metas) == 0:
                    data = None
            elif self.filter_type() == Data:
                dmin, dmax = self.limit_lower, self.limit_upper
                data = data.copy()
                assert data.X.base is None
                mask = None
                if self.limit_lower_enabled:
                    mask = data.X < dmin
                if self.limit_upper_enabled:
                    if mask is not None:
                        mask |= data.X > dmax
                    else:
                        mask = data.X < dmax
                data.X[mask] = 0.0
            else:
                assert False

        self.Outputs.data.send(data)
Esempio n. 7
0
    def __call__(self, data):
        if data.domain != self.domain:
            data = data.from_table(self.domain, data)

        x = getx(data)

        data = data.copy()

        if self.limits == 1:
            x_sorter = np.argsort(x)
            lim_min = np.searchsorted(x,
                                      self.lower,
                                      sorter=x_sorter,
                                      side="left")
            lim_max = np.searchsorted(x,
                                      self.upper,
                                      sorter=x_sorter,
                                      side="right")
            limits = [lim_min, lim_max]
            y_s = data.X[:, x_sorter][:, limits[0]:limits[1]]
        else:
            y_s = data.X

        if self.method == Normalize.MinMax:
            data.X /= nanmax(np.abs(y_s), axis=1).reshape((-1, 1))
        elif self.method == Normalize.Vector:
            # zero offset correction applies to entire spectrum, regardless of limits
            y_offsets = nanmean(data.X, axis=1).reshape((-1, 1))
            data.X -= y_offsets
            y_s -= y_offsets
            rssq = np.sqrt(nansum(y_s**2, axis=1).reshape((-1, 1)))
            data.X /= rssq
        elif self.method == Normalize.Offset:
            data.X -= nanmin(y_s, axis=1).reshape((-1, 1))
        elif self.method == Normalize.Attribute:
            # attr normalization applies to entire spectrum, regardless of limits
            # meta indices are -ve and start at -1
            if self.attr not in (None, "None", ""):
                attr_index = -1 - data.domain.index(self.attr)
                factors = data.metas[:, attr_index].astype(float)
                data.X /= factors[:, None]
        return data.X
Esempio n. 8
0
    def __call__(self, data):
        """
        Apply randomization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be randomized.

        Returns
        -------
        data : Orange.data.Table
            Randomized data table.
        """
        new_data = data.copy()
        if self.rand_type & Randomize.RandomizeClasses:
            new_data.Y = self.randomize(new_data.Y)
        if self.rand_type & Randomize.RandomizeAttributes:
            new_data.X = self.randomize(new_data.X)
        if self.rand_type & Randomize.RandomizeMetas:
            new_data.metas = self.randomize(new_data.metas)
        return new_data