Beispiel #1
0
    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.send("Selection", None)
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attrX, [val_x]),
                        filter.FilterDiscrete(self.attrY, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        if self.discrete_data is not self.data:
            idset = set(selection.ids)
            sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
            selection = self.data[sel_idx]
        self.send("Selection", selection)
Beispiel #2
0
    def update_selection(self):
        if self.areas is None or not self.selection:
            self.send("Selection", None)
            return

        filters = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filters.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attrX, [val_x]),
                        filter.FilterDiscrete(self.attrY, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filters) == 1:
            filters = filters[0]
        else:
            filters = filter.Values(filters, conjunction=False)
        self.send("Selection", filters(self.data))
Beispiel #3
0
 def send_selection(self):
     if not self.selection or self.data is None:
         self.send("Selected Data", None)
         return
     filters = []
     self.warning(6)
     if self.discrete_data is not self.data:
         if isinstance(self.data, SqlTable):
             self.warning(
                 6,
                 "Selection of continuous variables on SQL is not supported"
             )
     for i in self.selection:
         cols, vals, area = self.areas[i]
         filters.append(
             filter.Values(
                 filter.FilterDiscrete(col, [val])
                 for col, val in zip(cols, vals)))
     if len(filters) > 1:
         filters = filter.Values(filters, conjunction=False)
     else:
         filters = filters[0]
     selection = filters(self.discrete_data)
     if self.discrete_data is not self.data:
         idset = set(selection.ids)
         sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
         selection = self.discrete_data[sel_idx]
     self.send("Selected Data", selection)
Beispiel #4
0
    def send_selection(self):
        if not self.selection or self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, _ = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]

        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))
Beispiel #5
0
    def test_discrete_value_filter_with_None(self):
        filtered_data = filter.Values(
            conditions=[filter.FilterDiscrete(3, None)])(self.table)
        correct_data = [row for row in self.data if row[3] is not None]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Beispiel #6
0
    def test_discrete_value_filter_with_multiple_values(self):
        filtered_data = filter.Values(
            conditions=[filter.FilterDiscrete(3, ["a", "b"])])(self.table)
        correct_data = [row for row in self.data if row[3] in ["a", "b"]]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Beispiel #7
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if isinstance(attr, ContinuousVariable):
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif isinstance(attr, StringVariable):
                    if any(v for v in values):
                        continue
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper == 2:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if oper == 0:
                            f_values = {values[0]}
                        else:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                filters = data_filter.Values(conditions)
                matching_output = filters(self.data)
                filters.negate = True
                non_matching_output = filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name
            #
            # if self.purge_attributes or self.purge_classes:
            #     remover = orange.RemoveUnusedValues(removeOneValued=True)
            #
            #     newDomain = remover(matching_output, 0, True, self.purge_classes)
            #     if newDomain != matching_output.domain:
            #         matching_output = orange.ExampleTable(newDomain, matching_output)
            #
            #     newDomain = remover(non_matching_output, 0, True, self.purge_classes)
            #     if newDomain != non_matching_output.domain:
            #         nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.update_info(matching_output, self.data_out_rows)
Beispiel #8
0
    def output_data(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            filters = data_filter.Values()
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if isinstance(attr, ContinuousVariable):
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif isinstance(attr, StringVariable):
                    if any(v for v in values):
                        continue
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper in [2, 3]:
                        raise NotImplementedError(
                            "subset filters for discrete attributes are not "
                            "implemented yet")
                    elif oper == 4:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        if oper == 0:
                            f_values = {values[0] - 1}
                        else:
                            f_values = set(range(len(attr.values)))
                            f_values.remove(values[0] - 1)
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                filters.conditions.append(filter)

            matching_output = filters(self.data)
            filters.negate = True
            non_matching_output = filters(self.data)

            if hasattr(self.data, "name"):
                matching_output.name = self.data.name
                non_matching_output.name = self.data.name
            """
            if self.purge_attributes or self.purge_classes:
                remover = orange.RemoveUnusedValues(removeOneValued=True)

                newDomain = remover(matching_output, 0, True, self.purge_classes)
                if newDomain != matching_output.domain:
                    matching_output = orange.ExampleTable(newDomain, matching_output)

                newDomain = remover(non_matching_output, 0, True, self.purge_classes)
                if newDomain != non_matching_output.domain:
                    nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output)
            """
        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)
    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(create_annotated_table(self.data, []))
            self.info.set_output_summary(self.info.NoOutput)
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attr_x.name, [val_x]),
                        filter.FilterDiscrete(self.attr_y.name, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]

        summary = len(selection) if selection is not None else self.info.NoOutput
        details = format_summary_details(selection) if selection is not None else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx))
Beispiel #10
0
def get_conditional_distribution(data, attrs):
    cond_dist = defaultdict(int)
    dist = defaultdict(int)
    cond_dist[""] = dist[""] = len(data)
    all_attrs = [data.domain[a] for a in attrs]
    if data.domain.class_var is not None:
        all_attrs.append(data.domain.class_var)

    for i in range(1, len(all_attrs) + 1):
        attr = all_attrs[:i]
        if type(data) == SqlTable:
            # make all possible pairs of attributes + class_var
            attr = [a.to_sql() for a in attr]
            fields = attr + ["COUNT(*)"]
            query = data._sql_query(fields, group_by=attr)
            with data._execute_sql_query(query) as cur:
                res = cur.fetchall()
            for r in res:
                str_values = [
                    a.repr_val(a.to_val(x)) for a, x in zip(all_attrs, r[:-1])
                ]
                str_values = [x if x != "?" else "None" for x in str_values]
                cond_dist["-".join(str_values)] = r[-1]
                dist[str_values[-1]] += r[-1]
        else:
            for indices in product(*(range(len(a.values)) for a in attr)):
                vals = []
                conditions = []
                for k, ind in enumerate(indices):
                    vals.append(attr[k].values[ind])
                    fd = filter.FilterDiscrete(
                        column=attr[k], values=[attr[k].values[ind]]
                    )
                    conditions.append(fd)
                filt = filter.Values(conditions)
                filtdata = filt(data)
                cond_dist["-".join(vals)] = len(filtdata)
                dist[vals[-1]] += len(filtdata)
    return cond_dist, dist
Beispiel #11
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        annotated_output = None

        self.Error.clear()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr.is_continuous:
                    try:
                        floats = self._values_to_floats(attr, values)
                    except ValueError as e:
                        self.Error.parsing_error(e.args[0])
                        return
                    if floats is None:
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, opertype, *floats)
                elif attr.is_string:
                    filter = data_filter.FilterString(
                        attr_index, opertype, *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                self.filters = data_filter.Values(conditions)
                matching_output = self.filters(self.data)
                self.filters.negate = True
                non_matching_output = self.filters(self.data)

                row_sel = np.in1d(self.data.ids, matching_output.ids)
                annotated_output = create_annotated_table(self.data, row_sel)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)
                annotated_output = remover(annotated_output)

        if matching_output is not None and not len(matching_output):
            matching_output = None
        if non_matching_output is not None and not len(non_matching_output):
            non_matching_output = None
        if annotated_output is not None and not len(annotated_output):
            annotated_output = None

        self.Outputs.matching_data.send(matching_output)
        self.Outputs.unmatched_data.send(non_matching_output)
        self.Outputs.annotated_data.send(annotated_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        self.update_info(matching_output, self.data_out_rows, "Out: ")
Beispiel #12
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        self.error()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr.is_continuous:
                    if any(not v for v in values):
                        continue

                    # Parse datetime strings into floats
                    if isinstance(attr, TimeVariable):
                        try:
                            values = [attr.parse(v) for v in values]
                        except ValueError as e:
                            self.error(e.args[0])
                            return

                    filter = data_filter.FilterContinuous(
                        attr_index, opertype, *[float(v) for v in values])
                elif attr.is_string:
                    filter = data_filter.FilterString(
                        attr_index, opertype, *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                self.filters = data_filter.Values(conditions)
                matching_output = self.filters(self.data)
                self.filters.negate = True
                non_matching_output = self.filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        self.update_info(matching_output, self.data_out_rows, "Out: ")
Beispiel #13
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if attr.is_continuous:
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif attr.is_string:
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper == 3:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if oper == 0:
                            f_values = {values[0]}
                        elif oper == 1:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif oper == 2:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                filters = data_filter.Values(conditions)
                matching_output = filters(self.data)
                filters.negate = True
                non_matching_output = filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        self.update_info(matching_output, self.data_out_rows)