def commit(self): matching_output = self.data non_matching_output = None if self.data: domain = self.data.domain conditions = [] for attr_name, oper, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] if isinstance(attr, ContinuousVariable): if any(not v for v in values): continue filter = data_filter.FilterContinuous( attr_index, oper, *[float(v) for v in values]) elif isinstance(attr, StringVariable): if any(v for v in values): continue filter = data_filter.FilterString( attr_index, oper, *[str(v) for v in values]) else: if oper == 2: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if oper == 0: f_values = {values[0]} else: f_values = set(attr.values) f_values.remove(values[0]) filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: filters = data_filter.Values(conditions) matching_output = filters(self.data) filters.negate = True non_matching_output = filters(self.data) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name # # if self.purge_attributes or self.purge_classes: # remover = orange.RemoveUnusedValues(removeOneValued=True) # # newDomain = remover(matching_output, 0, True, self.purge_classes) # if newDomain != matching_output.domain: # matching_output = orange.ExampleTable(newDomain, matching_output) # # newDomain = remover(non_matching_output, 0, True, self.purge_classes) # if newDomain != non_matching_output.domain: # nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output) self.send("Matching Data", matching_output) self.send("Unmatched Data", non_matching_output) self.update_info(matching_output, self.data_out_rows)
def output_data(self): matching_output = self.data non_matching_output = None if self.data: domain = self.data.domain filters = data_filter.Values() for attr_name, oper, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] if isinstance(attr, ContinuousVariable): if any(not v for v in values): continue filter = data_filter.FilterContinuous( attr_index, oper, *[float(v) for v in values]) elif isinstance(attr, StringVariable): if any(v for v in values): continue filter = data_filter.FilterString( attr_index, oper, *[str(v) for v in values]) else: if oper in [2, 3]: raise NotImplementedError( "subset filters for discrete attributes are not " "implemented yet") elif oper == 4: f_values = None else: if not values or not values[0]: continue if oper == 0: f_values = {values[0] - 1} else: f_values = set(range(len(attr.values))) f_values.remove(values[0] - 1) filter = data_filter.FilterDiscrete(attr_index, f_values) filters.conditions.append(filter) matching_output = filters(self.data) filters.negate = True non_matching_output = filters(self.data) if hasattr(self.data, "name"): matching_output.name = self.data.name non_matching_output.name = self.data.name """ if self.purge_attributes or self.purge_classes: remover = orange.RemoveUnusedValues(removeOneValued=True) newDomain = remover(matching_output, 0, True, self.purge_classes) if newDomain != matching_output.domain: matching_output = orange.ExampleTable(newDomain, matching_output) newDomain = remover(non_matching_output, 0, True, self.purge_classes) if newDomain != non_matching_output.domain: nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output) """ self.send("Matching Data", matching_output) self.send("Unmatched Data", non_matching_output)
def test_filter_string_not_equal(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.NotEqual, 'in') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] != 'in'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_is_defined(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.IsDefined) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_between(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Between, 'a', 'c') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'a' <= row[0] <= 'c'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_contains(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Contains, 'et') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'et' in row[0]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_greater_equal(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.GreaterEqual, 'volutpat') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0] >= 'volutpat'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_not_equal_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.NotEqual, 'donec', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] != 'Donec'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_outside(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Outside, 'am', 'di') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and not 'am' < row[0] < 'di'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_ends_with(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.EndsWith, 's') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0].endswith('s')] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_less_equal(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.LessEqual, "A") ])(self.table) correct_data = [ SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0] <= "A" ] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_between_case_insensitive_value(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Between, 'I', 'O', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'i' < row[0].lower() <= 'o'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_greater_equal_case_insensitive_value(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.GreaterEqual, 'In', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0].lower() >= 'in'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_outside_case_insensitive(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Outside, 'd', 'k', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and not 'd' < row[0].lower() < 'k'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_contains_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Contains, 'do', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'do' in row[0].lower()] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_starts_with_case_insensitive(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.StartsWith, 'D', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0].lower().startswith('d')] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_less_equal_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.LessEqual, 'donec', case_sensitive=False) ])(self.table) correct_data = [ Instance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0].lower() <= 'donec' ] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_other(self): table = np.array([ 'bcd4d9c0-361e-bad4-7ceb-0d171cdec981', '544b7ddc-d861-0201-81c8-9f7ad0bbf531', 'b35a10f7-7901-f313-ec16-5ad9778040a6', 'b267c4be-4a26-60b5-e664-737a90a40e93' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['uuid']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) filters = filter.Values([filter.FilterString(-1, 0, 'foo')]) self.assertEqual(len(filters(sql_table)), 0)
def commit(self): matching_output = self.data non_matching_output = None annotated_output = None self.Error.clear() if self.data: domain = self.data.domain conditions = [] for attr_name, oper_idx, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] operators = self.Operators[type(attr)] opertype, _ = operators[oper_idx] if attr.is_continuous: try: floats = self._values_to_floats(attr, values) except ValueError as e: self.Error.parsing_error(e.args[0]) return if floats is None: continue filter = data_filter.FilterContinuous( attr_index, opertype, *floats) elif attr.is_string: filter = data_filter.FilterString( attr_index, opertype, *[str(v) for v in values]) else: if opertype == FilterDiscreteType.IsDefined: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if opertype == FilterDiscreteType.Equal: f_values = {values[0]} elif opertype == FilterDiscreteType.NotEqual: f_values = set(attr.values) f_values.remove(values[0]) elif opertype == FilterDiscreteType.In: f_values = set(values) else: raise ValueError("invalid operand") filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: self.filters = data_filter.Values(conditions) matching_output = self.filters(self.data) self.filters.negate = True non_matching_output = self.filters(self.data) row_sel = np.in1d(self.data.ids, matching_output.ids) annotated_output = create_annotated_table(self.data, row_sel) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name purge_attrs = self.purge_attributes purge_classes = self.purge_classes if (purge_attrs or purge_classes) and \ not isinstance(self.data, SqlTable): attr_flags = sum([ Remove.RemoveConstant * purge_attrs, Remove.RemoveUnusedValues * purge_attrs ]) class_flags = sum([ Remove.RemoveConstant * purge_classes, Remove.RemoveUnusedValues * purge_classes ]) # same settings used for attributes and meta features remover = Remove(attr_flags, class_flags, attr_flags) matching_output = remover(matching_output) non_matching_output = remover(non_matching_output) annotated_output = remover(annotated_output) if matching_output is not None and not len(matching_output): matching_output = None if non_matching_output is not None and not len(non_matching_output): non_matching_output = None if annotated_output is not None and not len(annotated_output): annotated_output = None self.Outputs.matching_data.send(matching_output) self.Outputs.unmatched_data.send(non_matching_output) self.Outputs.annotated_data.send(annotated_output) self.match_desc = report.describe_data_brief(matching_output) self.nonmatch_desc = report.describe_data_brief(non_matching_output) self.update_info(matching_output, self.data_out_rows, "Out: ")
def commit(self): matching_output = self.data non_matching_output = None self.error() if self.data: domain = self.data.domain conditions = [] for attr_name, oper_idx, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] operators = self.Operators[type(attr)] opertype, _ = operators[oper_idx] if attr.is_continuous: if any(not v for v in values): continue # Parse datetime strings into floats if isinstance(attr, TimeVariable): try: values = [attr.parse(v) for v in values] except ValueError as e: self.error(e.args[0]) return filter = data_filter.FilterContinuous( attr_index, opertype, *[float(v) for v in values]) elif attr.is_string: filter = data_filter.FilterString( attr_index, opertype, *[str(v) for v in values]) else: if opertype == FilterDiscreteType.IsDefined: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if opertype == FilterDiscreteType.Equal: f_values = {values[0]} elif opertype == FilterDiscreteType.NotEqual: f_values = set(attr.values) f_values.remove(values[0]) elif opertype == FilterDiscreteType.In: f_values = set(values) else: raise ValueError("invalid operand") filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: self.filters = data_filter.Values(conditions) matching_output = self.filters(self.data) self.filters.negate = True non_matching_output = self.filters(self.data) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name purge_attrs = self.purge_attributes purge_classes = self.purge_classes if (purge_attrs or purge_classes) and \ not isinstance(self.data, SqlTable): attr_flags = sum([ Remove.RemoveConstant * purge_attrs, Remove.RemoveUnusedValues * purge_attrs ]) class_flags = sum([ Remove.RemoveConstant * purge_classes, Remove.RemoveUnusedValues * purge_classes ]) # same settings used for attributes and meta features remover = Remove(attr_flags, class_flags, attr_flags) matching_output = remover(matching_output) non_matching_output = remover(non_matching_output) self.send("Matching Data", matching_output) self.send("Unmatched Data", non_matching_output) self.match_desc = report.describe_data_brief(matching_output) self.nonmatch_desc = report.describe_data_brief(non_matching_output) self.update_info(matching_output, self.data_out_rows, "Out: ")
def commit(self): matching_output = self.data non_matching_output = None if self.data: domain = self.data.domain conditions = [] for attr_name, oper, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] if attr.is_continuous: if any(not v for v in values): continue filter = data_filter.FilterContinuous( attr_index, oper, *[float(v) for v in values]) elif attr.is_string: filter = data_filter.FilterString( attr_index, oper, *[str(v) for v in values]) else: if oper == 3: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if oper == 0: f_values = {values[0]} elif oper == 1: f_values = set(attr.values) f_values.remove(values[0]) elif oper == 2: f_values = set(values) else: raise ValueError("invalid operand") filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: filters = data_filter.Values(conditions) matching_output = filters(self.data) filters.negate = True non_matching_output = filters(self.data) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name purge_attrs = self.purge_attributes purge_classes = self.purge_classes if (purge_attrs or purge_classes) and \ not isinstance(self.data, SqlTable): attr_flags = sum([ Remove.RemoveConstant * purge_attrs, Remove.RemoveUnusedValues * purge_attrs ]) class_flags = sum([ Remove.RemoveConstant * purge_classes, Remove.RemoveUnusedValues * purge_classes ]) # same settings used for attributes and meta features remover = Remove(attr_flags, class_flags, attr_flags) matching_output = remover(matching_output) non_matching_output = remover(non_matching_output) self.send("Matching Data", matching_output) self.send("Unmatched Data", non_matching_output) self.match_desc = report.describe_data_brief(matching_output) self.nonmatch_desc = report.describe_data_brief(non_matching_output) self.update_info(matching_output, self.data_out_rows)