def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(self.data, sel_idx))
def update_selection(self): if self.areas is None or not self.selection: self.send("Selection", None) return filters = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair filters.append( filter.Values([ filter.FilterDiscrete(self.attrX, [val_x]), filter.FilterDiscrete(self.attrY, [val_y]) ])) else: width = 1 pen = area.pen() pen.setWidth(width) area.setPen(pen) if len(filters) == 1: filters = filters[0] else: filters = filter.Values(filters, conjunction=False) self.send("Selection", filters(self.data))
def send_selection(self): if not self.selection or self.data is None: self.send("Selected Data", None) return filters = [] self.warning(6) if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.warning( 6, "Selection of continuous variables on SQL is not supported" ) for i in self.selection: cols, vals, area = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) if self.discrete_data is not self.data: idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] selection = self.discrete_data[sel_idx] self.send("Selected Data", selection)
def update_selection(self): """ Update the graph (pen width) to show the current selection. Filter and output the data. """ if self.areas is None or not self.selection: self.send("Selection", None) return filts = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair filts.append( filter.Values([ filter.FilterDiscrete(self.attrX, [val_x]), filter.FilterDiscrete(self.attrY, [val_y]) ])) else: width = 1 pen = area.pen() pen.setWidth(width) area.setPen(pen) if len(filts) == 1: filts = filts[0] else: filts = filter.Values(filts, conjunction=False) selection = filts(self.discrete_data) if self.discrete_data is not self.data: idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] selection = self.data[sel_idx] self.send("Selection", selection)
def test_discrete_value_filter_with_None(self): filtered_data = filter.Values( conditions=[filter.FilterDiscrete(3, None)])(self.table) correct_data = [row for row in self.data if row[3] is not None] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_discrete_value_filter_with_multiple_values(self): filtered_data = filter.Values( conditions=[filter.FilterDiscrete(3, ["a", "b"])])(self.table) correct_data = [row for row in self.data if row[3] in ["a", "b"]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def dataset_download(gds_id, samples=None, transpose=False, callback=None): file_name = '{}.tab'.format(gds_id) local_files.update(file_name, extract=True, callback=callback) table = Table(local_files.localpath_download(file_name)) title = table.name gds_info = local_files.info(file_name) table_annotations = {TableAnnotation.tax_id: gds_info['taxid']} if callback: callback() if samples is not None: filters = [ table_filter.FilterStringList(sample, sample_types) for sample, sample_types in samples.items() ] table = table_filter.Values(filters)(table) column_values = [] for meta_var in samples.keys(): column_values.append( table.get_column_view(table.domain[meta_var])[0]) class_values = list(map('|'.join, zip(*column_values))) _class_values = list(set(class_values)) map_class_values = { value: key for (key, value) in enumerate(_class_values) } class_var = DiscreteVariable(name='class', values=_class_values) _domain = Domain(table.domain.attributes, table.domain.class_vars + (class_var, ), table.domain.metas) table = table.transform(_domain) col, _ = table.get_column_view(class_var) col[:] = [map_class_values[class_val] for class_val in class_values] if transpose: table = Table.transpose(table, feature_names_column='sample_id', meta_attr_name='genes') table.name = title # table name is lost after transpose table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[ TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_column] = gds_info[ TableAnnotation.gene_id_attribute] else: table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[ TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_attribute] = gds_info[ TableAnnotation.gene_id_attribute] if callback: callback() table.attributes = table_annotations return table
def commit(self): selection_model = self.data_view.selectionModel() if selection_model: selection = selection_model.selectedRows(self.COUNT) self.selected_rows = [self.filter_proxy_model.mapToSource(sel).row() for sel in selection] if selection and self.input_genes: genes = [model_index.data(Qt.UserRole) for model_index in selection] output_genes = [gene_name for gene_name in list(set.union(*genes))] self.num_of_sel_genes = len(output_genes) self.update_info_box() if self.use_attr_names: selected = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[self.gene_id_attribute]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table(domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: # create filter from selected column for genes only_known = table_filter.FilterStringList(self.gene_id_column, output_genes) # apply filter to the data data_table = table_filter.Values([only_known])(self.input_data) self.Outputs.matched_genes.send(data_table)
def __apply_filters(self, data_table): set_of_attributes = set([ key for attr in data_table.domain[:] for key in attr.attributes.keys() if key == NCBI_ID ]) gene_id = NCBI_ID if NCBI_ID in data_table.domain or set_of_attributes else None if self.include_entrez_id: data_table, gene_id = self.__handle_ids(data_table) if self.filter_unknown: known_input_genes = [ gene.input_name for gene in self.gene_matcher.get_known_genes() ] if self.use_attr_names: temp_domain = Domain([ attr for attr in data_table.domain.attributes if attr.name in known_input_genes ], metas=data_table.domain.metas, class_vars=data_table.domain.class_vars) data_table = data_table.transform(temp_domain) else: # create filter from selected column for genes only_known = table_filter.FilterStringList( self.selected_gene_col, known_input_genes) # apply filter to the data data_table = table_filter.Values([only_known])(data_table) return data_table, gene_id
def dataset_download(gds_id, samples=None, transpose=False, callback=None): file_name = '{}.tab'.format(gds_id) file_path = local_files.localpath_download(file_name, extract=True, callback=callback) table = Table(file_path) title = table.name gds_info = local_files.info(file_name) table_annotations = {TableAnnotation.tax_id: gds_info['taxid']} if callback: callback() if samples is not None: filters = [table_filter.FilterStringList(sample, sample_types) for sample, sample_types in samples.items()] table = table_filter.Values(filters)(table) column_values = [] for meta_var in samples.keys(): column_values.append(table.get_column_view(table.domain[meta_var])[0]) class_values = list(map('|'.join, zip(*column_values))) _class_values = list(set(class_values)) map_class_values = {value: key for (key, value) in enumerate(_class_values)} class_var = DiscreteVariable(name='class', values=_class_values) _domain = Domain(table.domain.attributes, table.domain.class_vars + (class_var,), table.domain.metas) table = table.transform(_domain) col, _ = table.get_column_view(class_var) col[:] = [map_class_values[class_val] for class_val in class_values] if transpose: table = Table.transpose(table, feature_names_column='sample_id', meta_attr_name='genes') # When transposing a table, variable.attributes get picked up as numerical values instead of strings. # We need to convert from Continuous to StringVariable _genes = [ [str(int(gene)) if not np.isnan(gene) else '?'] for gene in table.get_column_view('Entrez ID')[0].astype(np.float64) ] new_var = StringVariable('Entrez ID') metas = [var for var in table.domain.metas if var.name != 'Entrez ID'] + [new_var] new_domain = Domain(table.domain.attributes, table.domain.class_vars, metas) table = table.transform(new_domain) table[:, new_var] = _genes # table name is lost after transpose table.name = title table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_column] = gds_info[TableAnnotation.gene_id_attribute] else: table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_attribute] = gds_info[TableAnnotation.gene_id_attribute] if callback: callback() table.attributes = table_annotations return table
def commit(self): matching_output = self.data non_matching_output = None if self.data: domain = self.data.domain conditions = [] for attr_name, oper, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] if isinstance(attr, ContinuousVariable): if any(not v for v in values): continue filter = data_filter.FilterContinuous( attr_index, oper, *[float(v) for v in values]) elif isinstance(attr, StringVariable): if any(v for v in values): continue filter = data_filter.FilterString( attr_index, oper, *[str(v) for v in values]) else: if oper == 2: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if oper == 0: f_values = {values[0]} else: f_values = set(attr.values) f_values.remove(values[0]) filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: filters = data_filter.Values(conditions) matching_output = filters(self.data) filters.negate = True non_matching_output = filters(self.data) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name # # if self.purge_attributes or self.purge_classes: # remover = orange.RemoveUnusedValues(removeOneValued=True) # # newDomain = remover(matching_output, 0, True, self.purge_classes) # if newDomain != matching_output.domain: # matching_output = orange.ExampleTable(newDomain, matching_output) # # newDomain = remover(non_matching_output, 0, True, self.purge_classes) # if newDomain != non_matching_output.domain: # nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output) self.send("Matching Data", matching_output) self.send("Unmatched Data", non_matching_output) self.update_info(matching_output, self.data_out_rows)
def test_continuous_value_filter_isdefined(self): filtered_data = filter.Values(conditions=[ filter.FilterContinuous(1, filter.FilterContinuous.IsDefined) ])(self.table) correct_data = [row for row in self.data if row[1] is not None] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_continuous_value_filter_not_equal(self): filtered_data = filter.Values(conditions=[ filter.FilterContinuous(0, filter.FilterContinuous.NotEqual, 1) ])(self.table) correct_data = [row for row in self.data if row[0] != 1] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def output_data(self): matching_output = self.data non_matching_output = None if self.data: domain = self.data.domain filters = data_filter.Values() for attr_name, oper, values in self.conditions: attr_index = domain.index(attr_name) attr = domain[attr_index] if isinstance(attr, ContinuousVariable): if any(not v for v in values): continue filter = data_filter.FilterContinuous( attr_index, oper, *[float(v) for v in values]) elif isinstance(attr, StringVariable): if any(v for v in values): continue filter = data_filter.FilterString( attr_index, oper, *[str(v) for v in values]) else: if oper in [2, 3]: raise NotImplementedError( "subset filters for discrete attributes are not " "implemented yet") elif oper == 4: f_values = None else: if not values or not values[0]: continue if oper == 0: f_values = {values[0] - 1} else: f_values = set(range(len(attr.values))) f_values.remove(values[0] - 1) filter = data_filter.FilterDiscrete(attr_index, f_values) filters.conditions.append(filter) matching_output = filters(self.data) filters.negate = True non_matching_output = filters(self.data) if hasattr(self.data, "name"): matching_output.name = self.data.name non_matching_output.name = self.data.name """ if self.purge_attributes or self.purge_classes: remover = orange.RemoveUnusedValues(removeOneValued=True) newDomain = remover(matching_output, 0, True, self.purge_classes) if newDomain != matching_output.domain: matching_output = orange.ExampleTable(newDomain, matching_output) newDomain = remover(non_matching_output, 0, True, self.purge_classes) if newDomain != non_matching_output.domain: nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output) """ self.send("Matching Data", matching_output) self.send("Unmatched Data", non_matching_output)
def test_filter_string_not_equal(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.NotEqual, 'in') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] != 'in'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_continuous_value_filter_between(self): filtered_data = filter.Values(conditions=[ filter.FilterContinuous(0, filter.FilterContinuous.Between, 1, 2) ])(self.table) correct_data = [row for row in self.data if row[0] is not None and 1 <= row[0] <= 2] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_continuous_value_filter_greater(self): filtered_data = filter.Values(conditions=[ filter.FilterContinuous(0, filter.FilterContinuous.Greater, 1) ])(self.table) correct_data = [row for row in self.data if row[0] is not None and row[0] > 1] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_list_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterStringList(-1, ['donec'], case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] in ['Donec']] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_continuous_value_filter_outside(self): filtered_data = filter.Values(conditions=[ filter.FilterContinuous(0, filter.FilterContinuous.Outside, 2, 3) ])(self.table) correct_data = [row for row in self.data if row[0] is not None and not 2 <= row[0] <= 3] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_is_defined(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.IsDefined) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_list(self): filtered_data = filter.Values(conditions=[ filter.FilterStringList(-1, ['et', 'in']) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] in ['et', 'in']] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_ends_with(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.EndsWith, 's') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0].endswith('s')] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_outside(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Outside, 'am', 'di') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and not 'am' < row[0] < 'di'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_contains(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Contains, 'et') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'et' in row[0]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_between(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Between, 'a', 'c') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'a' <= row[0] <= 'c'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_greater_equal(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.GreaterEqual, 'volutpat') ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0] >= 'volutpat'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_equal_case_insensitive_value(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Equal, 'In', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] == 'in'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_outside_case_insensitive(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Outside, 'd', 'k', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and not 'd' < row[0].lower() < 'k'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_contains_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Contains, 'do', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'do' in row[0].lower()] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_between_case_insensitive_value(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Between, 'I', 'O', case_sensitive=False) ])(self.table) correct_data = [SqlRowInstance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'i' < row[0].lower() <= 'o'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)