Exemple #1
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if isinstance(attr, ContinuousVariable):
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif isinstance(attr, StringVariable):
                    if any(v for v in values):
                        continue
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper == 2:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if oper == 0:
                            f_values = {values[0]}
                        else:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                filters = data_filter.Values(conditions)
                matching_output = filters(self.data)
                filters.negate = True
                non_matching_output = filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name
            #
            # if self.purge_attributes or self.purge_classes:
            #     remover = orange.RemoveUnusedValues(removeOneValued=True)
            #
            #     newDomain = remover(matching_output, 0, True, self.purge_classes)
            #     if newDomain != matching_output.domain:
            #         matching_output = orange.ExampleTable(newDomain, matching_output)
            #
            #     newDomain = remover(non_matching_output, 0, True, self.purge_classes)
            #     if newDomain != non_matching_output.domain:
            #         nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.update_info(matching_output, self.data_out_rows)
Exemple #2
0
    def output_data(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            filters = data_filter.Values()
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if isinstance(attr, ContinuousVariable):
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif isinstance(attr, StringVariable):
                    if any(v for v in values):
                        continue
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper in [2, 3]:
                        raise NotImplementedError(
                            "subset filters for discrete attributes are not "
                            "implemented yet")
                    elif oper == 4:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        if oper == 0:
                            f_values = {values[0] - 1}
                        else:
                            f_values = set(range(len(attr.values)))
                            f_values.remove(values[0] - 1)
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                filters.conditions.append(filter)

            matching_output = filters(self.data)
            filters.negate = True
            non_matching_output = filters(self.data)

            if hasattr(self.data, "name"):
                matching_output.name = self.data.name
                non_matching_output.name = self.data.name
            """
            if self.purge_attributes or self.purge_classes:
                remover = orange.RemoveUnusedValues(removeOneValued=True)

                newDomain = remover(matching_output, 0, True, self.purge_classes)
                if newDomain != matching_output.domain:
                    matching_output = orange.ExampleTable(newDomain, matching_output)

                newDomain = remover(non_matching_output, 0, True, self.purge_classes)
                if newDomain != non_matching_output.domain:
                    nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output)
            """
        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)
Exemple #3
0
    def test_filter_string_not_equal(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.NotEqual, 'in')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] != 'in']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #4
0
    def test_filter_string_is_defined(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.IsDefined)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] is not None]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #5
0
    def test_filter_string_between(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Between, 'a', 'c')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'a' <= row[0] <= 'c']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #6
0
    def test_filter_string_contains(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Contains, 'et')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'et' in row[0]]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #7
0
    def test_filter_string_greater_equal(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.GreaterEqual, 'volutpat')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and row[0] >= 'volutpat']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #8
0
    def test_filter_string_not_equal_case_insensitive_data(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.NotEqual, 'donec',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] != 'Donec']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #9
0
    def test_filter_string_outside(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Outside, 'am', 'di')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and not 'am' < row[0] < 'di']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #10
0
    def test_filter_string_ends_with(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.EndsWith, 's')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and row[0].endswith('s')]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #11
0
    def test_filter_string_less_equal(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.LessEqual, "A")
        ])(self.table)
        correct_data = [
            SqlRowInstance(filtered_data.domain, row) for row in self.data
            if row[0] is not None and row[0] <= "A"
        ]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #12
0
    def test_filter_string_between_case_insensitive_value(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Between, 'I', 'O',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'i' < row[0].lower() <= 'o']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #13
0
    def test_filter_string_greater_equal_case_insensitive_value(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.GreaterEqual, 'In',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and row[0].lower() >= 'in']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #14
0
    def test_filter_string_outside_case_insensitive(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Outside, 'd', 'k',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and not 'd' < row[0].lower() < 'k']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #15
0
    def test_filter_string_contains_case_insensitive_data(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Contains, 'do',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'do' in row[0].lower()]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #16
0
    def test_filter_string_starts_with_case_insensitive(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.StartsWith, 'D',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None
                        and row[0].lower().startswith('d')]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #17
0
    def test_filter_string_less_equal_case_insensitive_data(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1,
                                filter.FilterString.LessEqual,
                                'donec',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [
            Instance(filtered_data.domain, row) for row in self.data
            if row[0] is not None and row[0].lower() <= 'donec'
        ]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Exemple #18
0
    def test_other(self):
        table = np.array([
            'bcd4d9c0-361e-bad4-7ceb-0d171cdec981',
            '544b7ddc-d861-0201-81c8-9f7ad0bbf531',
            'b35a10f7-7901-f313-ec16-5ad9778040a6',
            'b267c4be-4a26-60b5-e664-737a90a40e93'
        ]).reshape(-1, 1)
        conn, table_name = self.create_sql_table(table, ['uuid'])

        sql_table = SqlTable(conn, table_name, inspect_values=False)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        sql_table = SqlTable(conn, table_name, inspect_values=True)
        self.assertFirstMetaIsInstance(sql_table, StringVariable)

        filters = filter.Values([filter.FilterString(-1, 0, 'foo')])
        self.assertEqual(len(filters(sql_table)), 0)
Exemple #19
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        annotated_output = None

        self.Error.clear()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr.is_continuous:
                    try:
                        floats = self._values_to_floats(attr, values)
                    except ValueError as e:
                        self.Error.parsing_error(e.args[0])
                        return
                    if floats is None:
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, opertype, *floats)
                elif attr.is_string:
                    filter = data_filter.FilterString(
                        attr_index, opertype, *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                self.filters = data_filter.Values(conditions)
                matching_output = self.filters(self.data)
                self.filters.negate = True
                non_matching_output = self.filters(self.data)

                row_sel = np.in1d(self.data.ids, matching_output.ids)
                annotated_output = create_annotated_table(self.data, row_sel)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)
                annotated_output = remover(annotated_output)

        if matching_output is not None and not len(matching_output):
            matching_output = None
        if non_matching_output is not None and not len(non_matching_output):
            non_matching_output = None
        if annotated_output is not None and not len(annotated_output):
            annotated_output = None

        self.Outputs.matching_data.send(matching_output)
        self.Outputs.unmatched_data.send(non_matching_output)
        self.Outputs.annotated_data.send(annotated_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        self.update_info(matching_output, self.data_out_rows, "Out: ")
Exemple #20
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        self.error()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr.is_continuous:
                    if any(not v for v in values):
                        continue

                    # Parse datetime strings into floats
                    if isinstance(attr, TimeVariable):
                        try:
                            values = [attr.parse(v) for v in values]
                        except ValueError as e:
                            self.error(e.args[0])
                            return

                    filter = data_filter.FilterContinuous(
                        attr_index, opertype, *[float(v) for v in values])
                elif attr.is_string:
                    filter = data_filter.FilterString(
                        attr_index, opertype, *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                self.filters = data_filter.Values(conditions)
                matching_output = self.filters(self.data)
                self.filters.negate = True
                non_matching_output = self.filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        self.update_info(matching_output, self.data_out_rows, "Out: ")
Exemple #21
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if attr.is_continuous:
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif attr.is_string:
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper == 3:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if oper == 0:
                            f_values = {values[0]}
                        elif oper == 1:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif oper == 2:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                filters = data_filter.Values(conditions)
                matching_output = filters(self.data)
                filters.negate = True
                non_matching_output = filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        self.update_info(matching_output, self.data_out_rows)