Esempio n. 1
0
 def __call__(self, data):
     X = _InterpolateCommon(self.points,
                            self.kind,
                            None,
                            handle_nans=self.handle_nans,
                            interpfn=self.interpfn)(data)
     domain = Orange.data.Domain(self.target.domain.attributes,
                                 data.domain.class_vars, data.domain.metas)
     data = data.transform(domain)
     with data.unlocked(data.X):
         data.X = X
     return data
Esempio n. 2
0
    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = np.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                assert (np.diff(indices) > 0).all(), "strictly increasing"
                if self._mask is not None:
                    # pylint: disable=invalid-unary-operand-type
                    indices = np.flatnonzero(~self._mask)[indices]
                selectedmask[indices] = True

            if self._mask is not None:
                scores = np.full(shape=selectedmask.shape,
                                 fill_value=np.nan)
                # pylint: disable=invalid-unary-operand-type
                scores[~self._mask] = self._silhouette
            else:
                scores = self._silhouette

            var = self.cluster_var_model[self.cluster_var_idx]

            domain = self.data.domain
            proposed = "Silhouette ({})".format(escape(var.name))
            names = [var.name for var in itertools.chain(domain.attributes,
                                                         domain.class_vars,
                                                         domain.metas)]
            unique = get_unique_names(names, proposed)
            silhouette_var = Orange.data.ContinuousVariable(unique)
            domain = Orange.data.Domain(
                domain.attributes,
                domain.class_vars,
                domain.metas + (silhouette_var, ))

            if np.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, np.flatnonzero(selectedmask))

            if selected is not None:
                with selected.unlocked(selected.metas):
                    selected[:, silhouette_var] = np.c_[scores[selectedmask]]

            data = self.data.transform(domain)
            with data.unlocked(data.metas):
                data[:, silhouette_var] = np.c_[scores]

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(create_annotated_table(data, indices))
Esempio n. 3
0
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        with data.unlocked():
            if self.method == Normalize.Vector:
                nans = np.isnan(data.X)
                nan_num = nans.sum(axis=1, keepdims=True)
                ys = data.X
                if np.any(nan_num > 0):
                    # interpolate nan elements for normalization
                    x = getx(data)
                    ys = interp1d_with_unknowns_numpy(x, ys, x)
                    ys = np.nan_to_num(ys)  # edge elements can still be zero
                data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
                if np.any(nan_num > 0):
                    # keep nans where they were
                    data.X[nans] = float("nan")
            elif self.method == Normalize.Area:
                norm_data = Integrate(methods=self.int_method,
                                      limits=[[self.lower, self.upper]])(data)
                data.X /= norm_data.X
                replace_infs(data.X)
            elif self.method == Normalize.SNV:
                data.X = (data.X - bottleneck.nanmean(data.X, axis=1).reshape(-1, 1)) / \
                         bottleneck.nanstd(data.X, axis=1).reshape(-1, 1)
                replace_infs(data.X)
            elif self.method == Normalize.Attribute:
                if self.attr in data.domain and isinstance(
                        data.domain[self.attr],
                        Orange.data.ContinuousVariable):
                    ndom = Orange.data.Domain([data.domain[self.attr]])
                    factors = data.transform(ndom)
                    data.X /= factors.X
                    replace_infs(data.X)
                    nd = data.domain[self.attr]
                else:  # invalid attribute for normalization
                    data.X *= float("nan")
            elif self.method == Normalize.MinMax:
                min = bottleneck.nanmin(data.X, axis=1).reshape(-1, 1)
                max = bottleneck.nanmax(data.X, axis=1).reshape(-1, 1)
                data.X = data.X / (max - min)
                replace_infs(data.X)
        return data.X
Esempio n. 4
0
    def commit(self):
        self.Warning.renamed_variables.clear()
        tables, domain, source_var = [], None, None
        if self.primary_data is not None:
            tables = [self.primary_data] + list(self.more_data)
            domain = self.primary_data.domain
        elif self.more_data:
            if self.ignore_compute_value:
                tables = self._dumb_tables()
            else:
                tables = self.more_data
            domains = [table.domain for table in tables]
            domain = self.merge_domains(domains)

        if tables and self.append_source_column:
            assert domain is not None
            names = [getattr(t, 'name', '') for t in tables]
            if len(names) != len(set(names)):
                names = [
                    '{} ({})'.format(name, i) for i, name in enumerate(names)
                ]
            source_var = Orange.data.DiscreteVariable(get_unique_names(
                domain, self.source_attr_name),
                                                      values=names)
            places = ["class_vars", "attributes", "metas"]
            domain = add_columns(
                domain, **{places[self.source_column_role]: (source_var, )})

        tables = [table.transform(domain) for table in tables]
        if tables:
            data = type(tables[0]).concatenate(tables)
            if source_var:
                source_ids = np.array(
                    list(
                        flatten([i] * len(table)
                                for i, table in enumerate(tables)))).reshape(
                                    (-1, 1))
                parts = [data.Y, data.X, data.metas]
                with data.unlocked(parts[self.source_column_role]):
                    data[:, source_var] = source_ids
        else:
            data = None

        self.Outputs.data.send(data)
    def commit(self):
        items = getattr(self.matrix, "items", self.items)
        if not items:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            return

        selection = self.dendrogram.selected_nodes()
        selection = sorted(selection, key=lambda c: c.value.first)

        indices = [leaf.value.index for leaf in leaves(self.root)]

        maps = [
            indices[node.value.first:node.value.last] for node in selection
        ]

        selected_indices = list(chain(*maps))
        unselected_indices = sorted(
            set(range(self.root.value.last)) - set(selected_indices))

        if not selected_indices:
            self.Outputs.selected_data.send(None)
            annotated_data = create_annotated_table(items, []) \
                if self.selection_method == 0 and self.matrix.axis else None
            self.Outputs.annotated_data.send(annotated_data)
            return

        selected_data = None

        if isinstance(items, Orange.data.Table) and self.matrix.axis == 1:
            # Select rows
            c = np.zeros(self.matrix.shape[0])

            for i, indices in enumerate(maps):
                c[indices] = i
            c[unselected_indices] = len(maps)

            mask = c != len(maps)

            data, domain = items, items.domain
            attrs = domain.attributes
            classes = domain.class_vars
            metas = domain.metas

            var_name = get_unique_names(domain, "Cluster")
            values = [f"C{i + 1}" for i in range(len(maps))]

            clust_var = Orange.data.DiscreteVariable(var_name,
                                                     values=values + ["Other"])
            domain = Orange.data.Domain(attrs, classes, metas + (clust_var, ))
            data = items.transform(domain)
            with data.unlocked(data.metas):
                data.get_column_view(clust_var)[0][:] = c

            if selected_indices:
                selected_data = data[mask]
                clust_var = Orange.data.DiscreteVariable(var_name,
                                                         values=values)
                selected_data.domain = Domain(attrs, classes,
                                              metas + (clust_var, ))

            annotated_data = create_annotated_table(data, selected_indices)

        elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0:
            # Select columns
            attrs = []
            for clust, indices in chain(enumerate(maps, start=1),
                                        [(0, unselected_indices)]):
                for i in indices:
                    attr = items.domain[i].copy()
                    attr.attributes["cluster"] = clust
                    attrs.append(attr)
            domain = Orange.data.Domain(
                # len(unselected_indices) can be 0
                attrs[:len(attrs) - len(unselected_indices)],
                items.domain.class_vars,
                items.domain.metas)
            selected_data = items.from_table(domain, items)

            domain = Orange.data.Domain(attrs, items.domain.class_vars,
                                        items.domain.metas)
            annotated_data = items.from_table(domain, items)

        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(annotated_data)