def create_annotated_table(data, selected_indices): """ Returns data with concatenated flag column. Flag column represents whether data instance has been selected (Yes) or not (No), which is determined in selected_indices parameter. :param data: Table :param selected_indices: list or ndarray :return: Table """ if data is None: return None names = [var.name for var in data.domain.variables + data.domain.metas] name = get_next_name(names, ANNOTATED_DATA_FEATURE_NAME) metas = data.domain.metas + (DiscreteVariable(name, ("No", "Yes")), ) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) annotated = np.zeros((len(data), 1)) if selected_indices is not None: annotated[selected_indices] = 1 table = Table(domain, data.X, data.Y, metas=np.hstack((data.metas, annotated))) table.attributes = data.attributes table.ids = data.ids return table
def dataset_download(gds_id, samples=None, transpose=False, callback=None): file_name = '{}.tab'.format(gds_id) file_path = local_files.localpath_download(file_name, extract=True, callback=callback) table = Table(file_path) title = table.name gds_info = local_files.info(file_name) table_annotations = {TableAnnotation.tax_id: gds_info['taxid']} if callback: callback() if samples is not None: filters = [table_filter.FilterStringList(sample, sample_types) for sample, sample_types in samples.items()] table = table_filter.Values(filters)(table) column_values = [] for meta_var in samples.keys(): column_values.append(table.get_column_view(table.domain[meta_var])[0]) class_values = list(map('|'.join, zip(*column_values))) _class_values = list(set(class_values)) map_class_values = {value: key for (key, value) in enumerate(_class_values)} class_var = DiscreteVariable(name='class', values=_class_values) _domain = Domain(table.domain.attributes, table.domain.class_vars + (class_var,), table.domain.metas) table = table.transform(_domain) col, _ = table.get_column_view(class_var) col[:] = [map_class_values[class_val] for class_val in class_values] if transpose: table = Table.transpose(table, feature_names_column='sample_id', meta_attr_name='genes') # When transposing a table, variable.attributes get picked up as numerical values instead of strings. # We need to convert from Continuous to StringVariable _genes = [ [str(int(gene)) if not np.isnan(gene) else '?'] for gene in table.get_column_view('Entrez ID')[0].astype(np.float64) ] new_var = StringVariable('Entrez ID') metas = [var for var in table.domain.metas if var.name != 'Entrez ID'] + [new_var] new_domain = Domain(table.domain.attributes, table.domain.class_vars, metas) table = table.transform(new_domain) table[:, new_var] = _genes # table name is lost after transpose table.name = title table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_column] = gds_info[TableAnnotation.gene_id_attribute] else: table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_attribute] = gds_info[TableAnnotation.gene_id_attribute] if callback: callback() table.attributes = table_annotations return table
def dataset_download(gds_id, samples=None, transpose=False, callback=None): file_name = '{}.tab'.format(gds_id) local_files.update(file_name, extract=True, callback=callback) table = Table(local_files.localpath_download(file_name)) title = table.name gds_info = local_files.info(file_name) table_annotations = {TableAnnotation.tax_id: gds_info['taxid']} if callback: callback() if samples is not None: filters = [ table_filter.FilterStringList(sample, sample_types) for sample, sample_types in samples.items() ] table = table_filter.Values(filters)(table) column_values = [] for meta_var in samples.keys(): column_values.append( table.get_column_view(table.domain[meta_var])[0]) class_values = list(map('|'.join, zip(*column_values))) _class_values = list(set(class_values)) map_class_values = { value: key for (key, value) in enumerate(_class_values) } class_var = DiscreteVariable(name='class', values=_class_values) _domain = Domain(table.domain.attributes, table.domain.class_vars + (class_var, ), table.domain.metas) table = table.transform(_domain) col, _ = table.get_column_view(class_var) col[:] = [map_class_values[class_val] for class_val in class_values] if transpose: table = Table.transpose(table, feature_names_column='sample_id', meta_attr_name='genes') table.name = title # table name is lost after transpose table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[ TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_column] = gds_info[ TableAnnotation.gene_id_attribute] else: table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[ TableAnnotation.gene_as_attr_name] table_annotations[TableAnnotation.gene_id_attribute] = gds_info[ TableAnnotation.gene_id_attribute] if callback: callback() table.attributes = table_annotations return table
def test_no_metadata(self): tempdir = tempfile.mkdtemp() table = Table("titanic") table.attributes = OrderedDict() fname = path.join(tempdir, "out.tab") TabReader.write_table_metadata(fname, table) self.assertFalse(path.isfile(fname + ".metadata")) shutil.rmtree(tempdir)
def test_no_metadata(self): tempdir = tempfile.mkdtemp() try: table = Table("titanic") table.attributes = OrderedDict() fname = path.join(tempdir, "out.tab") TabReader.write_table_metadata(fname, table) self.assertFalse(path.isfile(fname + ".metadata")) finally: shutil.rmtree(tempdir)
def _data_with_similarity(self, indices): data = self.data varname = get_unique_names(data.domain, "distance") metas = data.domain.metas + (ContinuousVariable(varname), ) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) data_metas = self.distances[indices].reshape((-1, 1)) if data.domain.metas: data_metas = np.hstack((data.metas[indices], data_metas)) neighbors = Table(domain, data.X[indices], data.Y[indices], data_metas) neighbors.attributes = self.data.attributes return neighbors
def test_metadata(self): tempdir = tempfile.mkdtemp() try: table = Table("titanic") table.attributes = OrderedDict() table.attributes["a"] = "aa" table.attributes["b"] = "bb" fname = path.join(tempdir, "out.tab") TabReader.write_table_metadata(fname, table) self.assertTrue(path.isfile(fname + ".metadata")) finally: shutil.rmtree(tempdir)
def test_attributes_saving_as_txt(self): tempdir = tempfile.mkdtemp() table = Table("titanic") table.attributes = OrderedDict() table.attributes["a"] = "aa" table.attributes["b"] = "bb" table.save(path.join(tempdir, "out.tab")) table = Table(path.join(tempdir, "out.tab")) self.assertIsInstance(table.attributes, OrderedDict) self.assertEqual(table.attributes["a"], "aa") self.assertEqual(table.attributes["b"], "bb") shutil.rmtree(tempdir)
def create_groups_table(data, selection): if data is None: return None names = [var.name for var in data.domain.variables + data.domain.metas] name = get_next_name(names, "Selection group") metas = data.domain.metas + (DiscreteVariable( name, ["Unselected"] + ["G{}".format(i + 1) for i in range(np.max(selection))]), ) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) table = Table(domain, data.X, data.Y, metas=np.hstack( (data.metas, selection.reshape(len(data), 1)))) table.attributes = data.attributes table.ids = data.ids return table
def create_annotated_table(data, selected_indices): """ Returns data with concatenated flag column. Flag column represents whether data instance has been selected (Yes) or not (No), which is determined in selected_indices parameter. :param data: Table :param selected_indices: list or ndarray :return: Table """ if data is None: return None names = [var.name for var in data.domain.variables + data.domain.metas] name = _get_next_name(names, ANNOTATED_DATA_FEATURE_NAME) metas = data.domain.metas + (DiscreteVariable(name, ("No", "Yes")),) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) annotated = np.zeros((len(data), 1)) if selected_indices is not None: annotated[selected_indices] = 1 table = Table(domain, data.X, data.Y, metas=np.hstack((data.metas, annotated))) table.attributes = data.attributes return table
def test_match_attr_name(self): widget = self.widget row = widget.attr_boxes.rows[0] data_combo, extra_combo = row.left_combo, row.right_combo domainA = Domain( [ DiscreteVariable("dA1", ("a", "b", "c", "d")), DiscreteVariable("dA2", ("aa", "bb")), DiscreteVariable("dA3", ("aa", "bb")) ], DiscreteVariable("cls", ("aaa", "bbb", "ccc")), [DiscreteVariable("mA1", ("cc", "dd")), StringVariable("mA2")]) XA = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 0], [3, 1, 0]]) yA = np.array([0, 1, 2, np.nan]) metasA = np.array([[0.0, "m1"], [1.0, "m2"], [np.nan, "m3"], [0.0, "m4"]]).astype(object) domainB = Domain( [ DiscreteVariable("dB1", values=("a", "b", "c")), ContinuousVariable("dA2") ], None, [StringVariable("cls"), DiscreteVariable("dA1", ("m4", "m5"))]) XB = np.array([[0, 0], [1, 1], [2, np.nan]]) yB = np.empty((3, 0)) metasB = np.array([[np.nan, np.nan], [1, 1], [0, 0]]).astype(object) dataA = Table(domainA, XA, yA, metasA) dataA.name = 'dataA' dataA.attributes = 'dataA attributes' dataB = Table(domainB, XB, yB, metasB) dataB.name = 'dataB' dataB.attributes = 'dataB attributes' self.send_signal(widget.Inputs.data, dataA) self.send_signal(widget.Inputs.extra_data, dataB) # match variable if available and the other combo is Row Index extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 5) # match variable if available and the other combo is ID extra_combo.setCurrentIndex(1) extra_combo.activated.emit(1) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 5) # don't match variable if other combo is set extra_combo.setCurrentIndex(4) extra_combo.activated.emit(4) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 4) # don't match if nothing to match to extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(4) data_combo.activated.emit(4) self.assertEqual(extra_combo.currentIndex(), 0) # don't match numeric with non-numeric extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(3) data_combo.activated.emit(3) self.assertEqual(extra_combo.currentIndex(), 0) # allow matching string with discrete extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(5) data_combo.activated.emit(5) self.assertEqual(extra_combo.currentIndex(), 4)
def test_data_attributes(self): """No crash on data attributes of different types""" data = Table("iris") data.attributes = {"att 1": 1, "att 2": True, "att 3": 3} self.send_signal(self.widget.Inputs.data, data)