Python nanmin Exemples, Orange.statistics.util.nanmin Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : owfeaturestatistics.py Projet : biolab/orange3

    def __compute_statistics(self):
        # Since data matrices can of mixed sparsity, we need to compute
        # attributes separately for each of them.
        matrices = [self.__attributes, self.__class_vars, self.__metas]
        # Filter out any matrices with size 0
        matrices = list(filter(lambda tup: tup[1].size, matrices))

        self._variable_types = np.array([type(var) for var in self.variables])
        self._variable_names = np.array([var.name.lower() for var in self.variables])
        self._min = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.nanmin(x, axis=0),
            continuous_f=lambda x: ut.nanmin(x, axis=0),
            time_f=lambda x: ut.nanmin(x, axis=0),
        )
        self._dispersion = self.__compute_stat(
            matrices,
            discrete_f=_categorical_entropy,
            continuous_f=lambda x: np.sqrt(ut.nanvar(x, axis=0)) / ut.nanmean(x, axis=0),
        )
        self._missing = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.countnans(x, axis=0),
            continuous_f=lambda x: ut.countnans(x, axis=0),
            string_f=lambda x: (x == StringVariable.Unknown).sum(axis=0),
            time_f=lambda x: ut.countnans(x, axis=0),
        )
        self._max = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.nanmax(x, axis=0),
            continuous_f=lambda x: ut.nanmax(x, axis=0),
            time_f=lambda x: ut.nanmax(x, axis=0),
        )

        # Since scipy apparently can't do mode on sparse matrices, cast it to
        # dense. This can be very inefficient for large matrices, and should
        # be changed
        def __mode(x, *args, **kwargs):
            if sp.issparse(x):
                x = x.todense(order="C")
            # return ss.mode(x, *args, **kwargs)[0]
            return ut.nanmode(x, *args, **kwargs)[0]  # Temporary replacement for scipy

        self._center = self.__compute_stat(
            matrices,
            discrete_f=lambda x: __mode(x, axis=0),
            continuous_f=lambda x: ut.nanmean(x, axis=0),
            time_f=lambda x: ut.nanmean(x, axis=0),
        )

Exemple #2

0

Afficher le fichier

Fichier : test_statistics.py Projet : qeryq/SFECOMLA

    def test_nanmin_nanmax(self):
        warnings.filterwarnings("ignore", r".*All-NaN slice encountered.*")
        for X in self.data:
            X_sparse = csr_matrix(X)
            for axis in [None, 0, 1]:
                np.testing.assert_array_equal(nanmin(X, axis=axis),
                                              np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(nanmin(X_sparse, axis=axis),
                                              np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(nanmax(X, axis=axis),
                                              np.nanmax(X, axis=axis))

                np.testing.assert_array_equal(nanmax(X_sparse, axis=axis),
                                              np.nanmax(X, axis=axis))

Exemple #3

0

Afficher le fichier

Fichier : owlineplot.py Projet : biolab/orange3

 def update_sel_range(self, y_data):
     if y_data is None:
         curve1 = curve2 = pg.PlotDataItem(x=self.x_data, y=self.__mean)
     else:
         curve1 = pg.PlotDataItem(x=self.x_data, y=nanmin(y_data, axis=0))
         curve2 = pg.PlotDataItem(x=self.x_data, y=nanmax(y_data, axis=0))
     self.sel_range.setCurves(curve1, curve2)

Exemple #4

0

Afficher le fichier

Fichier : histogram.py Projet : PrimozGodec/orange3

    def _get_histogram_edges(self):
        """Get the edges in the histogram based on the attribute type.

        In case of a continuous variable, we split the variable range into
        n bins. In case of a discrete variable, bins don't make sense, so we
        just return the attribute values.

        This will return the staring and ending edge, not just the edges in
        between (in the case of a continuous variable).

        Returns
        -------
        np.ndarray

        """
        if self.attribute.is_discrete:
            return np.array([self.attribute.to_val(v) for v in self.attribute.values])
        else:
            edges = np.linspace(ut.nanmin(self.x), ut.nanmax(self.x), self.n_bins)
            edge_diff = edges[1] - edges[0]
            edges = np.hstack((edges, [edges[-1] + edge_diff]))

            # If the variable takes on a single value, we still need to spit
            # out some reasonable bin edges
            if np.all(edges == edges[0]):
                edges = np.array([edges[0] - 1, edges[0], edges[0] + 1])

            return edges

Exemple #5

0

Afficher le fichier

 def update_sel_range(self, y_data):
     if y_data is None:
         curve1 = curve2 = pg.PlotDataItem(x=self.x_data, y=self.__mean)
     else:
         curve1 = pg.PlotDataItem(x=self.x_data, y=nanmin(y_data, axis=0))
         curve2 = pg.PlotDataItem(x=self.x_data, y=nanmax(y_data, axis=0))
     self.sel_range.setCurves(curve1, curve2)

Exemple #6

0

Afficher le fichier

    def _get_histogram_edges(self):
        """Get the edges in the histogram based on the attribute type.

        In case of a continuous variable, we split the variable range into
        n bins. In case of a discrete variable, bins don't make sense, so we
        just return the attribute values.

        This will return the staring and ending edge, not just the edges in
        between (in the case of a continuous variable).

        Returns
        -------
        np.ndarray

        """
        if self.attribute.is_discrete:
            return np.array(
                [self.attribute.to_val(v) for v in self.attribute.values])
        else:
            edges = np.linspace(ut.nanmin(self.x), ut.nanmax(self.x),
                                self.n_bins)
            edge_diff = edges[1] - edges[0]
            edges = np.hstack((edges, [edges[-1] + edge_diff]))

            # If the variable takes on a single value, we still need to spit
            # out some reasonable bin edges
            if np.all(edges == edges[0]):
                edges = np.array([edges[0] - 1, edges[0], edges[0] + 1])

            return edges

Exemple #7

0

Afficher le fichier

 def _get_range_curve(self):
     color = QColor(self.color)
     color.setAlpha(LinePlotStyle.RANGE_ALPHA)
     bottom, top = nanmin(self.y_data, axis=0), nanmax(self.y_data, axis=0)
     return pg.FillBetweenItem(
         pg.PlotDataItem(x=self.x_data, y=bottom),
         pg.PlotDataItem(x=self.x_data, y=top), brush=color
     )

Exemple #8

0

Afficher le fichier

Fichier : owlineplot.py Projet : biolab/orange3

 def _get_range_curve(self):
     color = QColor(self.color)
     color.setAlpha(LinePlotStyle.RANGE_ALPHA)
     bottom, top = nanmin(self.y_data, axis=0), nanmax(self.y_data, axis=0)
     return pg.FillBetweenItem(
         pg.PlotDataItem(x=self.x_data, y=bottom),
         pg.PlotDataItem(x=self.x_data, y=top), brush=color
     )

Exemple #9

0

Afficher le fichier

Fichier : owlineplot.py Projet : suppu-github/orange3

 def _get_range_curve(self):
     color = QColor(self.color)
     color.setAlpha(self.graph.range_settings[Updater.ALPHA_LABEL])
     bottom, top = nanmin(self.y_data, axis=0), nanmax(self.y_data, axis=0)
     return pg.FillBetweenItem(
         pg.PlotDataItem(x=self.x_data, y=bottom),
         pg.PlotDataItem(x=self.x_data, y=top), brush=color
     )

Exemple #10

0

Afficher le fichier

Fichier : test_statistics.py Projet : rowhit/orange3

    def test_nanmin_nanmax(self):
        for X in self.data:
            X_sparse = csr_matrix(X)
            for axis in [None, 0, 1]:
                np.testing.assert_array_equal(
                    nanmin(X, axis=axis),
                    np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmin(X_sparse, axis=axis),
                    np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmax(X, axis=axis),
                    np.nanmax(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmax(X_sparse, axis=axis),
                    np.nanmax(X, axis=axis))

Exemple #11

0

Afficher le fichier

Fichier : test_statistics.py Projet : kernc/orange3

    def test_nanmin_nanmax(self):
        for X in self.data:
            X_sparse = csr_matrix(X)
            for axis in [None, 0, 1]:
                np.testing.assert_array_equal(
                    nanmin(X, axis=axis),
                    np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmin(X_sparse, axis=axis),
                    np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmax(X, axis=axis),
                    np.nanmax(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmax(X_sparse, axis=axis),
                    np.nanmax(X, axis=axis))

Exemple #12

0

Afficher le fichier

Fichier : test_statistics.py Projet : biolab/orange3

    def test_nanmin_nanmax(self):
        warnings.filterwarnings("ignore", r".*All-NaN slice encountered.*")
        for X in self.data:
            X_sparse = csr_matrix(X)
            for axis in [None, 0, 1]:
                np.testing.assert_array_equal(
                    nanmin(X, axis=axis),
                    np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmin(X_sparse, axis=axis),
                    np.nanmin(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmax(X, axis=axis),
                    np.nanmax(X, axis=axis))

                np.testing.assert_array_equal(
                    nanmax(X_sparse, axis=axis),
                    np.nanmax(X, axis=axis))

Exemple #13

0

Afficher le fichier

    def __compute_statistics(self):
        # Since data matrices can of mixed sparsity, we need to compute
        # attributes separately for each of them.
        matrices = [self.__attributes, self.__class_vars, self.__metas]
        # Filter out any matrices with size 0
        matrices = list(filter(lambda tup: tup[1].size, matrices))

        self._variable_types = np.array([type(var) for var in self.variables])
        self._variable_names = np.array(
            [var.name.lower() for var in self.variables])
        self._min = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.nanmin(x, axis=0),
            continuous_f=lambda x: ut.nanmin(x, axis=0),
            time_f=lambda x: ut.nanmin(x, axis=0),
        )
        self._dispersion = self.__compute_stat(
            matrices,
            discrete_f=_categorical_entropy,
            continuous_f=lambda x: np.sqrt(ut.nanvar(x, axis=0)) / ut.nanmean(
                x, axis=0),
        )
        self._missing = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.countnans(x, axis=0),
            continuous_f=lambda x: ut.countnans(x, axis=0),
            string_f=lambda x: (x == StringVariable.Unknown).sum(axis=0),
            time_f=lambda x: ut.countnans(x, axis=0),
        )
        self._max = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.nanmax(x, axis=0),
            continuous_f=lambda x: ut.nanmax(x, axis=0),
            time_f=lambda x: ut.nanmax(x, axis=0),
        )
        self._center = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ss.mode(x)[0],
            continuous_f=lambda x: ut.nanmean(x, axis=0),
            time_f=lambda x: ut.nanmean(x, axis=0),
        )

Exemple #14

0

Afficher le fichier

Fichier : owfeaturestatistics.py Projet : lanzagar/orange3

    def __compute_statistics(self):
        # Since data matrices can of mixed sparsity, we need to compute
        # attributes separately for each of them.
        matrices = [self.__attributes, self.__class_vars, self.__metas]
        # Filter out any matrices with size 0
        matrices = list(filter(lambda tup: tup[1].size, matrices))

        self._variable_types = np.array([type(var) for var in self.variables])
        self._variable_names = np.array([var.name.lower() for var in self.variables])
        self._min = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.nanmin(x, axis=0),
            continuous_f=lambda x: ut.nanmin(x, axis=0),
            time_f=lambda x: ut.nanmin(x, axis=0),
        )
        self._dispersion = self.__compute_stat(
            matrices,
            discrete_f=_categorical_entropy,
            continuous_f=lambda x: np.sqrt(ut.nanvar(x, axis=0)) / ut.nanmean(x, axis=0),
        )
        self._missing = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.countnans(x, axis=0),
            continuous_f=lambda x: ut.countnans(x, axis=0),
            string_f=lambda x: (x == StringVariable.Unknown).sum(axis=0),
            time_f=lambda x: ut.countnans(x, axis=0),
        )
        self._max = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ut.nanmax(x, axis=0),
            continuous_f=lambda x: ut.nanmax(x, axis=0),
            time_f=lambda x: ut.nanmax(x, axis=0),
        )
        self._center = self.__compute_stat(
            matrices,
            discrete_f=lambda x: ss.mode(x)[0],
            continuous_f=lambda x: ut.nanmean(x, axis=0),
            time_f=lambda x: ut.nanmean(x, axis=0),
        )

Exemple #15

0

Afficher le fichier

Fichier : discretize.py Projet : markotoplak/orange3

 def __call__(self, data: Table, attribute):
     values, _ = data.get_column_view(attribute)
     points = []
     if values.size:
         mn, mx = ut.nanmin(values), ut.nanmax(values)
         if not np.isnan(mn):
             minf = int(1 + np.floor(mn / self.width))
             maxf = int(1 + np.floor(mx / self.width))
             if maxf - minf - 1 >= 100:
                 raise TooManyIntervals
             points = [i * self.width for i in range(minf, maxf)]
     return Discretizer.create_discretized_var(data.domain[attribute],
                                               points,
                                               ndigits=self.digits)

Exemple #16

0

Afficher le fichier

Fichier : discretize.py Projet : yuta2/orange3

 def __call__(self, data, attribute, fixed=None):
     if fixed:
         min, max = fixed[attribute.name]
         points = self._split_eq_width(min, max)
     else:
         if type(data) == SqlTable:
             stats = BasicStats(data, attribute)
             points = self._split_eq_width(stats.min, stats.max)
         else:
             values = data[:, attribute]
             values = values.X if values.X.size else values.Y
             min, max = ut.nanmin(values), ut.nanmax(values)
             points = self._split_eq_width(min, max)
     return Discretizer.create_discretized_var(
         data.domain[attribute], points)

Exemple #17

0

Afficher le fichier

Fichier : discretize.py Projet : markotoplak/orange3

 def __call__(self, data: Table, attribute, fixed=None):
     if fixed:
         mn, mx = fixed[attribute.name]
         points = self._split_eq_width(mn, mx)
     else:
         if type(data) == SqlTable:
             stats = BasicStats(data, attribute)
             points = self._split_eq_width(stats.min, stats.max)
         else:
             values, _ = data.get_column_view(attribute)
             if values.size:
                 mn, mx = ut.nanmin(values), ut.nanmax(values)
                 points = self._split_eq_width(mn, mx)
             else:
                 points = []
     return Discretizer.create_discretized_var(data.domain[attribute],
                                               points)

Exemple #18

0

Afficher le fichier

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = Table.from_table(self.domain,
                                     self.classifier.original_data)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), min_values,
                                   max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack(
                    (coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

Exemple #19

0

Afficher le fichier

Fichier : ownomogram.py Projet : astaric/orange3

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = self.classifier.original_data.transform(self.domain)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)),
                                   min_values, max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack((coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

Exemple #20

0

Afficher le fichier

Fichier : discretize.py Projet : markotoplak/orange3

 def __call__(self, data: Table, attribute):
     fmt = [
         "%Y", "%y %b", "%y %b %d", "%y %b %d %H:%M", "%y %b %d %H:%M",
         "%H:%M:%S"
     ][self.unit]
     values, _ = data.get_column_view(attribute)
     times = []
     if values.size:
         mn, mx = ut.nanmin(values), ut.nanmax(values)
         if not np.isnan(mn):
             mn = utc_from_timestamp(mn).timetuple()
             mx = utc_from_timestamp(mx).timetuple()
             times = _time_range(mn, mx, self.unit, self.width, 0, 100)
             if times is None:
                 raise TooManyIntervals
     times = [time.struct_time(t + (0, 0, 0)) for t in times][1:-1]
     points = np.array([calendar.timegm(t) for t in times])
     values = [time.strftime(fmt, t) for t in times]
     values = _simplified_time_intervals(values)
     var = data.domain[attribute]
     return DiscreteVariable(name=var.name,
                             values=values,
                             compute_value=Discretizer(var, points),
                             sparse=var.sparse)

Exemple #21

0

Afficher le fichier

Fichier : owfeaturestatistics.py Projet : Juriell/orange3-prototypes

    def __compute_statistics(self):
        # We will compute statistics over all data at once
        matrices = [self._data.X, self._data._Y, self._data.metas]

        # Since data matrices can of mixed sparsity, we need to compute
        # attributes separately for each of them.
        matrices = zip([
            self._domain.attributes, self._domain.class_vars, self._domain.metas
        ], matrices)
        # Filter out any matrices with size 0, filter the zipped matrices to 
        # eliminate variables in a single swoop
        matrices = list(filter(lambda tup: tup[1].size, matrices))

        def _apply_to_types(attrs_x_pair, discrete_f=None, continuous_f=None,
                            time_f=None, string_f=None, default_val=np.nan):
            """Apply functions to variable types e.g. discrete_f to discrete 
            variables. Default value is returned if there is no function 
            defined for specific variable types."""
            attrs, x = attrs_x_pair
            result = np.full(len(attrs), default_val)
            disc_var_idx, cont_var_idx, time_var_idx, str_var_idx = self._attr_indices(attrs)
            if discrete_f and x[:, disc_var_idx].size:
                result[disc_var_idx] = discrete_f(x[:, disc_var_idx].astype(np.float64))
            if continuous_f and x[:, cont_var_idx].size:
                result[cont_var_idx] = continuous_f(x[:, cont_var_idx].astype(np.float64))
            if time_f and x[:, time_var_idx].size:
                result[time_var_idx] = time_f(x[:, time_var_idx].astype(np.float64))
            if string_f and x[:, str_var_idx].size:
                result[str_var_idx] = string_f(x[:, str_var_idx].astype(np.object))
            return result

        self._variable_types = [type(var) for var in self._attributes]
        self._variable_names = [var.name.lower() for var in self._attributes]

        # Compute the center
        _center = partial(
            _apply_to_types,
            discrete_f=lambda x: ss.mode(x)[0],
            continuous_f=lambda x: ut.nanmean(x, axis=0),
        )
        self._center = np.hstack(map(_center, matrices))

        # Compute the dispersion
        def _entropy(x):
            p = [ut.bincount(row)[0] for row in x.T]
            p = [pk / np.sum(pk) for pk in p]
            return np.fromiter((ss.entropy(pk) for pk in p), dtype=np.float64)
        _dispersion = partial(
            _apply_to_types,
            discrete_f=lambda x: _entropy(x),
            continuous_f=lambda x: ut.nanvar(x, axis=0),
        )
        self._dispersion = np.hstack(map(_dispersion, matrices))

        # Compute minimum values
        _max = partial(
            _apply_to_types,
            discrete_f=lambda x: ut.nanmax(x, axis=0),
            continuous_f=lambda x: ut.nanmax(x, axis=0),
        )
        self._max = np.hstack(map(_max, matrices))

        # Compute maximum values
        _min = partial(
            _apply_to_types,
            discrete_f=lambda x: ut.nanmin(x, axis=0),
            continuous_f=lambda x: ut.nanmin(x, axis=0),
        )
        self._min = np.hstack(map(_min, matrices))

        # Compute # of missing values
        _missing = partial(
            _apply_to_types,
            discrete_f=lambda x: ut.countnans(x, axis=0),
            continuous_f=lambda x: ut.countnans(x, axis=0),
            string_f=lambda x: (x == StringVariable.Unknown).sum(axis=0),
            time_f=lambda x: ut.countnans(x, axis=0),
        )
        self._missing = np.hstack(map(_missing, matrices))