Esempio n. 1
0
 def _group_cols(self, data, group, attr):
     if isinstance(attr, np.ndarray):
         attr_col = attr
     else:
         attr_col = data.get_column_view(group)[0].astype(float)
     group_col = data.get_column_view(group)[0].astype(float)
     groups = [attr_col[group_col == i] for i in range(len(group.values))]
     groups = [col[~np.isnan(col)] for col in groups]
     return groups
Esempio n. 2
0
 def compute_score(attr):
     if attr is group_var:
         return 3
     if attr.is_continuous:
         # One-way ANOVA
         col = data.get_column_view(attr)[0].astype(float)
         groups = (col[group_col == i] for i in range(n_groups))
         groups = (col[~np.isnan(col)] for col in groups)
         groups = [group for group in groups if len(group)]
         p = f_oneway(*groups)[1] if len(groups) > 1 else 2
     else:
         # Chi-square with the given distribution into groups
         # (see degrees of freedom in computation of the p-value)
         if not attr.values or not group_var.values:
             return 2
         observed = np.array(
             contingency.get_contingency(data, group_var, attr))
         observed = observed[observed.sum(axis=1) != 0, :]
         observed = observed[:, observed.sum(axis=0) != 0]
         if min(observed.shape) < 2:
             return 2
         expected = \
             np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
             np.sum(observed)
         p = chisquare(observed.ravel(), f_exp=expected.ravel(),
                       ddof=n_groups - 1)[1]
     if math.isnan(p):
         return 2
     return p
Esempio n. 3
0
    def apply_group_sorting(self):
        def compute_stat(group):
            # This function and the one in apply_attr_sorting are similar, but
            # different in too many details, so they are kept as separate
            # functions.
            # If you discover a bug in this function, check the other one, too.
            if group is attr:
                return 3
            if group is None:
                return -1
            if attr.is_continuous:
                group_col = data.get_column_view(group)[0].astype(float)
                groups = (attr_col[group_col == i]
                          for i in range(len(group.values)))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                p = self._chi_square(group, attr)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        attr = self.attribute
        if self.order_grouping_by_importance:
            if attr.is_continuous:
                attr_col = data.get_column_view(attr)[0].astype(float)
            self._sort_list(self.group_vars, self.group_list, compute_stat)
        else:
            self._sort_list(self.group_vars, self.group_list, None)
    def _setup(self):
        self.closeContext()
        data = self.source
        group = data.domain.metas[0]
        gvec = data.get_column_view(group)[0]
        if group.is_string:
            mask = gvec == self.group_cb.itemData(self.group_index,
                                                  Qt.DisplayRole)
        else:
            mask = gvec == self.group_index

        data = data[mask]
        rest = data[:, data.domain.metas[1:]]
        model = LinkedTableModel(rest, parent=self)
        ref_col = rest.domain.metas.index(rest.domain[HeaderLabels.REFERENCE])
        self.view.setItemDelegateForColumn(
            ref_col, gui.LinkStyledItemDelegate(self.view))

        if self.proxy_model.sourceModel():
            self.proxy_model.sourceModel().deleteLater()
        self.proxy_model.setSourceModel(model)

        self.openContext(self.selected_group)
        self.set_selection()

        self.commit()
    def set_source(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the source data from which to fetch the output

        The output is a subset filtered on the first meta column (group)
        """
        self.source = data
        domain = data.domain

        if domain.metas:
            group = domain.metas[0]
            groupcol, _ = data.get_column_view(group)

            if group.is_string:
                group_values = list(map(str, unique(groupcol)))
            elif group.is_discrete:
                group_values = group.values
            else:
                raise TypeError("Invalid column type")
            try:
                idx = group_values.index(self.selected_group)
            except ValueError:
                idx = -1

            self.group_cb.clear()
            self.group_cb.addItems(group_values)
            if idx != -1:
                self.group_index = idx
                self.selected_group = group_values[idx]
            elif group_values:
                self.group_index = min(max(self.group_index, 0),
                                       len(group_values) - 1)
            self._setup()
Esempio n. 6
0
 def compute_score(attr):
     if attr is group_var:
         return 3
     if attr.is_continuous:
         # One-way ANOVA
         col = data.get_column_view(attr)[0].astype(float)
         groups = (col[group_col == i] for i in range(n_groups))
         groups = (col[~np.isnan(col)] for col in groups)
         groups = [group for group in groups if len(group)]
         p = f_oneway(*groups)[1] if len(groups) > 1 else 2
     else:
         # Chi-square with the given distribution into groups
         # (see degrees of freedom in computation of the p-value)
         if not attr.values or not group_var.values:
             return 2
         observed = np.array(
             contingency.get_contingency(data, group_var, attr))
         observed = observed[observed.sum(axis=1) != 0, :]
         observed = observed[:, observed.sum(axis=0) != 0]
         if min(observed.shape) < 2:
             return 2
         expected = \
             np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
             np.sum(observed)
         p = chisquare(observed.ravel(),
                       f_exp=expected.ravel(),
                       ddof=n_groups - 1)[1]
     if math.isnan(p):
         return 2
     return p
Esempio n. 7
0
    def apply_attr_sorting(self):
        def compute_score(attr):
            # This function and the one in apply_group_sorting are similar, but
            # different in too many details, so they are kept as separate
            # functions.
            # If you discover a bug in this function, check the other one, too.
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                p = self._chi_square(group_var, attr)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self._sort_list(self.attrs, self.attr_list, compute_score)
        else:
            self._sort_list(self.attrs, self.attr_list, None)
Esempio n. 8
0
def group_mask_rows(data, var, values):
    """
    Return a boolean array mask for data rows (instances).

    The mask will be True wherever the row's entry for `var` contains
    one of the `values`.

    Parameters
    ----------
    data : Orange.data.Table
        Source data table.
    var : Orange.data.DiscreteVariable
        The variable/column on which to match `values`.
    values : sequence of str
        The values to select (must be a subset of `var.values`)
    """
    var = data.domain[var]
    col_view, _ = data.get_column_view(var)
    target_ind = [var.values.index(t) for t in values]

    mask = numpy.zeros_like(col_view, dtype=bool)
    for i in target_ind:
        mask |= col_view == i

    return mask
Esempio n. 9
0
    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                p = self._chi_square(group_var, attr)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute
Esempio n. 10
0
def group_mask_rows(data, var, values):
    """
    Return a boolean array mask for data rows (instances).

    The mask will be True wherever the row's entry for `var` contains
    one of the `values`.

    Parameters
    ----------
    data : Orange.data.Table
        Source data table.
    var : Orange.data.DiscreteVariable
        The variable/column on which to match `values`.
    values : sequence of str
        The values to select (must be a subset of `var.values`)
    """
    var = data.domain[var]
    col_view, _ = data.get_column_view(var)
    target_ind = [var.values.index(t) for t in values]

    mask = numpy.zeros_like(col_view, dtype=bool)
    for i in target_ind:
        mask |= col_view == i

    return mask
Esempio n. 11
0
    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = np.outer(
                    observed.sum(axis=1), observed.sum(axis=0)
                ) / np.sum(observed)
                p = chisquare(
                    observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1
                )[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = (
                data.get_column_view(group_var)[0]
                if domain.has_continuous_attributes(
                    include_class=True, include_metas=True
                )
                else None
            )
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute
Esempio n. 12
0
 def compute_score(attr):
     if attr is group_var:
         return 3
     if attr.is_continuous:
         # One-way ANOVA
         col = data.get_column_view(attr)[0].astype(float)
         groups = (col[group_col == i] for i in range(n_groups))
         groups = (col[~np.isnan(col)] for col in groups)
         groups = [group for group in groups if len(group)]
         p = f_oneway(*groups)[1] if len(groups) > 1 else 2
     else:
         p = self._chi_square(group_var, attr)[1]
     if math.isnan(p):
         return 2
     return p
Esempio n. 13
0
    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(), f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.attrs[:] = chain(
                domain.variables,
                (a for a in data.domain.metas if a.is_primitive()))
        self.attribute = attribute
Esempio n. 14
0
    def __call__(self, data):
        if isinstance(data, Orange.data.Instance):
            column = numpy.array([float(data[self.variable])])
        else:
            column = numpy.array(data.get_column_view(self.variable)[0], copy=True)

        mask = numpy.isnan(column)
        if not numpy.any(mask):
            return column

        if isinstance(data, Orange.data.Instance):
            predicted = self.model(data)
        else:
            predicted = self.model(data[mask])
        column[mask] = predicted
        return column
Esempio n. 15
0
    def __call__(self, data):
        if isinstance(data, Orange.data.Instance):
            column = np.array([float(data[self.variable])])
        else:
            column = np.array(data.get_column_view(self.variable)[0],
                              copy=True)

        mask = np.isnan(column)
        if not np.any(mask):
            return column

        if isinstance(data, Orange.data.Instance):
            predicted = self.model(data)
        else:
            predicted = self.model(data[mask])
        column[mask] = predicted
        return column
Esempio n. 16
0
    def __call__(self, data):
        if isinstance(data, Orange.data.Instance):
            data = Orange.data.Table.from_list(data.domain, [data])
        domain = data.domain
        column = np.array(data.get_column_view(self.variable)[0], copy=True)

        mask = np.isnan(column)
        if not np.any(mask):
            return column

        if domain.class_vars:
            # cannot have class var in domain (due to backmappers in model)
            data = data.transform(
                Orange.data.Domain(domain.attributes, None, domain.metas))
        predicted = self.model(data[mask])
        column[mask] = predicted
        return column
Esempio n. 17
0
def group_selection_mask(data, group, indices):
    """
    Return the selection masks for the group.
    """
    if isinstance(group, ColumnGroup):
        selected = [group.values[i] for i in indices]
        target = set([(group.key, value) for value in selected])
        I = [bool(set(var.attributes.items()).intersection(target)) for var in data.domain.attributes]
        return numpy.array(I, dtype=bool)
    elif isinstance(group, RowGroup):
        target = set(indices)
        X, _ = data.get_column_view(group.var)
        I = numpy.zeros_like(X, dtype=bool)
        for i in target:
            I |= X == i
        return I
    else:
        raise TypeError("ColumnGroup or RowGroup expected, got {}".format(type(group).__name__))
Esempio n. 18
0
 def compute_score(attr):
     # This function and the one in apply_group_sorting are similar, but
     # different in too many details, so they are kept as separate
     # functions.
     # If you discover a bug in this function, check the other one, too.
     if attr is group_var:
         return 3
     if attr.is_continuous:
         # One-way ANOVA
         col = data.get_column_view(attr)[0].astype(float)
         groups = (col[group_col == i] for i in range(n_groups))
         groups = (col[~np.isnan(col)] for col in groups)
         groups = [group for group in groups if len(group)]
         p = f_oneway(*groups)[1] if len(groups) > 1 else 2
     else:
         p = self._chi_square(group_var, attr)[1]
     if math.isnan(p):
         return 2
     return p
Esempio n. 19
0
def group_selection_mask(data, group, indices):
    """
    Return the selection masks for the group.
    """
    if isinstance(group, ColumnGroup):
        selected = [group.values[i] for i in indices]
        target = set([(group.key, value) for value in selected])
        I = [bool(set(var.attributes.items()).intersection(target))
             for var in data.domain.attributes]
        return numpy.array(I, dtype=bool)
    elif isinstance(group, RowGroup):
        target = set(indices)
        X, _ = data.get_column_view(group.var)
        I = numpy.zeros_like(X, dtype=bool)
        for i in target:
            I |= X == i
        return I
    else:
        raise TypeError("ColumnGroup or RowGroup expected, got {}"
                        .format(type(group).__name__))
    def _setup_plot(self):
        """Setup the plot with new curve data."""
        assert self.data is not None
        self.graph.clear()

        data, domain = self.data, self.data.domain
        var = domain[self.group_var]
        class_col_data, _ = data.get_column_view(var)
        group_indices = [
            np.flatnonzero(class_col_data == i)
            for i in range(len(self.classes))
        ]

        self.graph.getAxis('bottom').setTicks([[
            (i + 1, str(a)) for i, a in enumerate(self.graph_variables)
        ]])

        X = np.arange(1, len(self.graph_variables) + 1)
        groups = []

        for i, indices in enumerate(group_indices):
            if len(indices) == 0:
                groups.append(None)
            else:
                if self.classes:
                    color = self.class_colors[i]
                else:
                    color = QColor(Qt.darkGray)
                group_data = data[indices, self.graph_variables]
                plot_x, plot_y, connect = disconnected_curve_data(group_data.X,
                                                                  x=X)

                color.setAlpha(200)
                lightcolor = QColor(color.lighter(factor=150))
                lightcolor.setAlpha(150)
                pen = QPen(color, 2)
                pen.setCosmetic(True)

                lightpen = QPen(lightcolor, 1)
                lightpen.setCosmetic(True)

                curve = pg.PlotCurveItem(
                    x=plot_x,
                    y=plot_y,
                    connect=connect,
                    pen=lightpen,
                    symbolSize=2,
                    antialias=True,
                )
                self.graph.addItem(curve)

                mean = np.nanmean(group_data.X, axis=0)

                meancurve = pg.PlotDataItem(x=X,
                                            y=mean,
                                            pen=pen,
                                            size=5,
                                            symbol="o",
                                            pxMode=True,
                                            symbolSize=5,
                                            antialias=True)
                self.graph.addItem(meancurve)

                q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75],
                                              axis=0)
                # TODO: implement and use a box plot item
                errorbar = pg.ErrorBarItem(x=X,
                                           y=mean,
                                           bottom=np.clip(
                                               mean - q1, 0, mean - q1),
                                           top=np.clip(q3 - mean, 0,
                                                       q3 - mean),
                                           beam=0.5)
                self.graph.addItem(errorbar)
                groups.append(
                    namespace(data=group_data,
                              indices=indices,
                              profiles=curve,
                              mean=meancurve,
                              boxplot=errorbar))

        self.__groups = groups
        self.__update_visibility()
    def commit(self):
        items = getattr(self.matrix, "items", self.items)
        if not items:
            # nothing to commit
            return

        selection = self.dendrogram.selected_nodes()
        selection = sorted(selection, key=lambda c: c.value.first)

        indices = [leaf.value.index for leaf in leaves(self.root)]

        maps = [indices[node.value.first:node.value.last]
                for node in selection]

        selected_indices = list(chain(*maps))
        unselected_indices = sorted(set(range(self.root.value.last)) -
                                    set(selected_indices))

        selected = [items[k] for k in selected_indices]
        unselected = [items[k] for k in unselected_indices]

        if not selected:
            self.send("Selected Data", None)
            self.send("Other Data", None)
            return
        selected_data = unselected_data = None

        if isinstance(items, Orange.data.Table):
            c = numpy.zeros(len(items))

            for i, indices in enumerate(maps):
                c[indices] = i
            c[unselected_indices] = len(maps)

            mask = c != len(maps)

            if self.append_clusters:
                clust_var = Orange.data.DiscreteVariable(
                    str(self.cluster_name),
                    values=["Cluster {}".format(i + 1)
                            for i in range(len(maps))] +
                           ["Other"], ordered=True
                )
                data, domain = items, items.domain

                attrs = domain.attributes
                class_ = domain.class_vars
                metas = domain.metas

                if self.cluster_role == self.AttributeRole:
                    attrs = attrs + (clust_var,)
                elif self.cluster_role == self.ClassRole:
                    class_ = class_ + (clust_var,)
                elif self.cluster_role == self.MetaRole:
                    metas = metas + (clust_var,)

                domain = Orange.data.Domain(attrs, class_, metas)
                data = Orange.data.Table(domain, data)
                data.get_column_view(clust_var)[0][:] = c
            else:
                data = items

            if selected:
                selected_data = data[mask]
            if unselected:
                unselected_data = data[~mask]

        self.send("Selected Data", selected_data)
        self.send("Other Data", unselected_data)
Esempio n. 22
0
 def column(data, variable):
     a, _ = data.get_column_view(variable)
     return a.ravel()
Esempio n. 23
0
 def __call__(self, data, variable):
     col, _ = data.get_column_view(variable)
     return np.isnan(col)
Esempio n. 24
0
 def __call__(self, data, variable):
     col, _ = data.get_column_view(variable)
     return np.isnan(col)
Esempio n. 25
0
    def _setup_plot(self):
        """Setup the plot with new curve data."""
        assert self.data is not None
        self.graph.clear()

        data, domain = self.data, self.data.domain
        var = domain[self.group_var]
        class_col_data, _ = data.get_column_view(var)
        group_indices = [np.flatnonzero(class_col_data == i)
                         for i in range(len(self.classes))]

        self.graph.getAxis('bottom').setTicks([
            [(i+1, str(a)) for i, a in enumerate(self.graph_variables)]
        ])

        X = np.arange(1, len(self.graph_variables)+1)
        groups = []

        for i, indices in enumerate(group_indices):
            if len(indices) == 0:
                groups.append(None)
            else:
                if self.classes:
                    color = self.class_colors[i]
                else:
                    color = QColor(Qt.darkGray)
                group_data = data[indices, self.graph_variables]
                plot_x, plot_y, connect = disconnected_curve_data(group_data.X, x=X)

                color.setAlpha(200)
                lightcolor = QColor(color.lighter(factor=150))
                lightcolor.setAlpha(150)
                pen = QPen(color, 2)
                pen.setCosmetic(True)

                lightpen = QPen(lightcolor, 1)
                lightpen.setCosmetic(True)

                curve = pg.PlotCurveItem(
                    x=plot_x, y=plot_y, connect=connect,
                    pen=lightpen, symbolSize=2, antialias=True,
                )
                self.graph.addItem(curve)

                mean = np.nanmean(group_data.X, axis=0)

                meancurve = pg.PlotDataItem(
                    x=X, y=mean, pen=pen, size=5, symbol="o", pxMode=True,
                    symbolSize=5, antialias=True
                )
                self.graph.addItem(meancurve)

                q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0)
                # TODO: implement and use a box plot item
                errorbar = pg.ErrorBarItem(
                    x=X, y=mean,
                    bottom=np.clip(mean - q1, 0, mean - q1),
                    top=np.clip(q3 - mean, 0, q3 - mean),
                    beam=0.5
                )
                self.graph.addItem(errorbar)
                groups.append(
                    namespace(
                        data=group_data, indices=indices,
                        profiles=curve, mean=meancurve,
                        boxplot=errorbar)
                )

        self.__groups = groups
        self.__update_visibility()
Esempio n. 26
0
    def _setup_plot(self):
        """Setup the plot with new curve data."""
        assert self.data is not None

        legend = self.graph.plotItem.addLegend(offset=(-30, 30))

        data, domain = self.data, self.data.domain
        if is_discrete(domain.class_var):
            class_col_data, _ = data.get_column_view(domain.class_var)

            group_indices = [
                np.flatnonzero(class_col_data == i)
                for i in range(len(domain.class_var.values))
            ]
        else:
            group_indices = [np.arange(len(data))]

        X = np.arange(1, len(domain.attributes) + 1)
        groups = []

        for i, indices in enumerate(group_indices):
            if self.classes:
                color = self.class_colors[i]
            else:
                color = QColor(Qt.darkGray)
            group_data = data[indices, :]
            plot_x, plot_y, connect = disconnected_curve_data(group_data.X,
                                                              x=X)

            color.setAlpha(200)
            lightcolor = QColor(color.lighter(factor=150))
            lightcolor.setAlpha(150)
            pen = QPen(color, 2)
            pen.setCosmetic(True)

            lightpen = QPen(lightcolor, 1)
            lightpen.setCosmetic(True)
            hoverpen = QPen(pen)
            hoverpen.setWidth(2)

            curve = pg.PlotCurveItem(
                x=plot_x,
                y=plot_y,
                connect=connect,
                pen=lightpen,
                symbolSize=2,
                antialias=True,
            )
            self.graph.addItem(curve)

            hovercurves = []
            for index, profile in zip(indices, group_data.X):
                hcurve = HoverCurve(x=X,
                                    y=profile,
                                    pen=hoverpen,
                                    antialias=True)
                hcurve.setToolTip('{}'.format(index))
                hcurve._data_index = index
                hovercurves.append(hcurve)
                self.graph.addItem(hcurve)

            mean = np.nanmean(group_data.X, axis=0)

            meancurve = pg.PlotDataItem(x=X,
                                        y=mean,
                                        pen=pen,
                                        size=5,
                                        symbol="o",
                                        pxMode=True,
                                        symbolSize=5,
                                        antialias=True)
            hoverpen = QPen(hoverpen)
            hoverpen.setWidth(5)

            hc = HoverCurve(x=X, y=mean, pen=hoverpen, antialias=True)
            hc.setFlag(QGraphicsItem.ItemIsSelectable, False)
            self.graph.addItem(hc)

            self.graph.addItem(meancurve)
            legend.addItem(meancurve, "&nbsp; {}".format(self.classes[i]))
            q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0)
            # TODO: implement and use a box plot item
            errorbar = pg.ErrorBarItem(x=X,
                                       y=mean,
                                       bottom=np.clip(mean - q1, 0, mean - q1),
                                       top=np.clip(q3 - mean, 0, q3 - mean),
                                       beam=0.5)
            self.graph.addItem(errorbar)
            groups.append(
                namespace(data=group_data,
                          indices=indices,
                          profiles=curve,
                          hovercurves=hovercurves,
                          mean=meancurve,
                          boxplot=errorbar))

        self.__groups = groups
        self.__update_visibility()
        self.__update_tooltips()
Esempio n. 27
0
    def _setup_plot(self):
        """Setup the plot with new curve data."""
        assert self.data is not None

        data, domain = self.data, self.data.domain
        if is_discrete(domain.class_var):
            class_col_data, _ = data.get_column_view(domain.class_var)

            group_indices = [np.flatnonzero(class_col_data == i)
                             for i in range(len(domain.class_var.values))]
        else:
            group_indices = [np.arange(len(data))]

        X = np.arange(1, len(domain.attributes)+1)
        groups = []

        for i, indices in enumerate(group_indices):
            if self.classes:
                color = self.class_colors[i]
            else:
                color = QColor(Qt.darkGray)
            group_data = data[indices, :]
            plot_x, plot_y, connect = disconnected_curve_data(group_data.X, x=X)

            color.setAlpha(200)
            lightcolor = QColor(color.lighter(factor=150))
            lightcolor.setAlpha(150)
            pen = QPen(color, 2)
            pen.setCosmetic(True)

            lightpen = QPen(lightcolor, 1)
            lightpen.setCosmetic(True)
            hoverpen = QPen(pen)
            hoverpen.setWidth(2)

            curve = pg.PlotCurveItem(
                x=plot_x, y=plot_y, connect=connect,
                pen=lightpen, symbolSize=2, antialias=True,
            )
            self.graph.addItem(curve)

            hovercurves = []
            for index, profile in zip(indices, group_data.X):
                hcurve = HoverCurve(x=X, y=profile, pen=hoverpen,
                                    antialias=True)
                hcurve.setToolTip('{}'.format(index))
                hcurve._data_index = index
                hovercurves.append(hcurve)
                self.graph.addItem(hcurve)

            mean = np.nanmean(group_data.X, axis=0)

            meancurve = pg.PlotDataItem(
                x=X, y=mean, pen=pen, size=5, symbol="o", pxMode=True,
                symbolSize=5, antialias=True
            )
            hoverpen = QPen(hoverpen)
            hoverpen.setWidth(5)

            hc = HoverCurve(x=X, y=mean, pen=hoverpen, antialias=True)
            hc.setFlag(QGraphicsItem.ItemIsSelectable, False)
            self.graph.addItem(hc)

            self.graph.addItem(meancurve)
            self.legend_items.append(meancurve)
            q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0)
            # TODO: implement and use a box plot item
            errorbar = pg.ErrorBarItem(
                x=X, y=mean,
                bottom=np.clip(mean - q1, 0, mean - q1),
                top=np.clip(q3 - mean, 0, q3 - mean),
                beam=0.5
            )
            self.graph.addItem(errorbar)
            groups.append(
                namespace(
                    data=group_data, indices=indices, profiles=curve,
                    hovercurves=hovercurves, mean=meancurve, boxplot=errorbar)
            )

        self.__groups = groups
        self.__update_visibility()
        self.__update_tooltips()
Esempio n. 28
0
    def commit(self):
        items = getattr(self.matrix, "items", self.items)
        if not items:
            # nothing to commit
            return

        selection = self.dendrogram.selected_nodes()
        selection = sorted(selection, key=lambda c: c.value.first)

        indices = [leaf.value.index for leaf in leaves(self.root)]

        maps = [
            indices[node.value.first:node.value.last] for node in selection
        ]

        selected_indices = list(chain(*maps))
        unselected_indices = sorted(
            set(range(self.root.value.last)) - set(selected_indices))

        if not selected_indices:
            self.send("Selected Data", None)
            self.send("Other Data", None)
            return

        selected_data = unselected_data = None

        if isinstance(items, Orange.data.Table) and self.matrix.axis == 1:
            # Select rows
            c = numpy.zeros(self.matrix.X.shape[0])

            for i, indices in enumerate(maps):
                c[indices] = i
            c[unselected_indices] = len(maps)

            mask = c != len(maps)

            if self.append_clusters:
                clust_var = Orange.data.DiscreteVariable(
                    str(self.cluster_name),
                    values=[
                        "Cluster {}".format(i + 1) for i in range(len(maps))
                    ] + ["Other"])
                data, domain = items, items.domain

                attrs = domain.attributes
                class_ = domain.class_vars
                metas = domain.metas

                if self.cluster_role == self.AttributeRole:
                    attrs = attrs + (clust_var, )
                elif self.cluster_role == self.ClassRole:
                    class_ = class_ + (clust_var, )
                elif self.cluster_role == self.MetaRole:
                    metas = metas + (clust_var, )

                domain = Orange.data.Domain(attrs, class_, metas)
                data = Orange.data.Table.from_table(domain, items)
                data.get_column_view(clust_var)[0][:] = c
            else:
                data = items

            if selected_indices:
                selected_data = data[mask]
            if unselected_indices:
                unselected_data = data[~mask]

        elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0:
            # Select columns
            domain = Orange.data.Domain(
                [items.domain[i] for i in selected_indices],
                items.domain.class_vars, items.domain.metas)
            selected_data = items.from_table(domain, items)
            domain = Orange.data.Domain(
                [items.domain[i] for i in unselected_indices],
                items.domain.class_vars, items.domain.metas)
            unselected_data = items.from_table(domain, items)

        self.send("Selected Data", selected_data)
        self.send("Other Data", unselected_data)
Esempio n. 29
0
    def commit(self):
        items = getattr(self.matrix, "items", self.items)
        if not items:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            return

        selection = self.dendrogram.selected_nodes()
        selection = sorted(selection, key=lambda c: c.value.first)

        indices = [leaf.value.index for leaf in leaves(self.root)]

        maps = [
            indices[node.value.first:node.value.last] for node in selection
        ]

        selected_indices = list(chain(*maps))
        unselected_indices = sorted(
            set(range(self.root.value.last)) - set(selected_indices))

        if not selected_indices:
            self.Outputs.selected_data.send(None)
            annotated_data = create_annotated_table(items, []) \
                if self.selection_method == 0 and self.matrix.axis else None
            self.Outputs.annotated_data.send(annotated_data)
            return

        selected_data = None

        if isinstance(items, Orange.data.Table) and self.matrix.axis == 1:
            # Select rows
            c = np.zeros(self.matrix.shape[0])

            for i, indices in enumerate(maps):
                c[indices] = i
            c[unselected_indices] = len(maps)

            mask = c != len(maps)

            data, domain = items, items.domain
            attrs = domain.attributes
            classes = domain.class_vars
            metas = domain.metas

            var_name = get_unique_names(domain, "Cluster")
            values = [f"C{i + 1}" for i in range(len(maps))]

            clust_var = Orange.data.DiscreteVariable(var_name,
                                                     values=values + ["Other"])
            domain = Orange.data.Domain(attrs, classes, metas + (clust_var, ))
            data = items.transform(domain)
            with data.unlocked(data.metas):
                data.get_column_view(clust_var)[0][:] = c

            if selected_indices:
                selected_data = data[mask]
                clust_var = Orange.data.DiscreteVariable(var_name,
                                                         values=values)
                selected_data.domain = Domain(attrs, classes,
                                              metas + (clust_var, ))

            annotated_data = create_annotated_table(data, selected_indices)

        elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0:
            # Select columns
            attrs = []
            for clust, indices in chain(enumerate(maps, start=1),
                                        [(0, unselected_indices)]):
                for i in indices:
                    attr = items.domain[i].copy()
                    attr.attributes["cluster"] = clust
                    attrs.append(attr)
            domain = Orange.data.Domain(
                # len(unselected_indices) can be 0
                attrs[:len(attrs) - len(unselected_indices)],
                items.domain.class_vars,
                items.domain.metas)
            selected_data = items.from_table(domain, items)

            domain = Orange.data.Domain(attrs, items.domain.class_vars,
                                        items.domain.metas)
            annotated_data = items.from_table(domain, items)

        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(annotated_data)