Beispiel #1
0
def create_annotated_table(data, selected_indices):
    """
    Returns data with concatenated flag column. Flag column represents
    whether data instance has been selected (Yes) or not (No), which is
    determined in selected_indices parameter.

    :param data: Table
    :param selected_indices: list or ndarray
    :return: Table
    """
    if data is None:
        return None
    names = [var.name for var in data.domain.variables + data.domain.metas]
    name = get_next_name(names, ANNOTATED_DATA_FEATURE_NAME)
    metas = data.domain.metas + (DiscreteVariable(name, ("No", "Yes")), )
    domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
    annotated = np.zeros((len(data), 1))
    if selected_indices is not None:
        annotated[selected_indices] = 1
    table = Table(domain,
                  data.X,
                  data.Y,
                  metas=np.hstack((data.metas, annotated)))
    table.attributes = data.attributes
    table.ids = data.ids
    return table
Beispiel #2
0
def dataset_download(gds_id, samples=None, transpose=False, callback=None):
    file_name = '{}.tab'.format(gds_id)
    file_path = local_files.localpath_download(file_name, extract=True, callback=callback)

    table = Table(file_path)
    title = table.name
    gds_info = local_files.info(file_name)
    table_annotations = {TableAnnotation.tax_id: gds_info['taxid']}

    if callback:
        callback()

    if samples is not None:
        filters = [table_filter.FilterStringList(sample, sample_types) for sample, sample_types in samples.items()]
        table = table_filter.Values(filters)(table)

        column_values = []
        for meta_var in samples.keys():
            column_values.append(table.get_column_view(table.domain[meta_var])[0])

        class_values = list(map('|'.join, zip(*column_values)))

        _class_values = list(set(class_values))
        map_class_values = {value: key for (key, value) in enumerate(_class_values)}
        class_var = DiscreteVariable(name='class', values=_class_values)
        _domain = Domain(table.domain.attributes, table.domain.class_vars + (class_var,), table.domain.metas)

        table = table.transform(_domain)
        col, _ = table.get_column_view(class_var)
        col[:] = [map_class_values[class_val] for class_val in class_values]

    if transpose:
        table = Table.transpose(table, feature_names_column='sample_id', meta_attr_name='genes')

        # When transposing a table, variable.attributes get picked up as numerical values instead of strings.
        # We need to convert from Continuous to StringVariable
        _genes = [
            [str(int(gene)) if not np.isnan(gene) else '?']
            for gene in table.get_column_view('Entrez ID')[0].astype(np.float64)
        ]
        new_var = StringVariable('Entrez ID')
        metas = [var for var in table.domain.metas if var.name != 'Entrez ID'] + [new_var]
        new_domain = Domain(table.domain.attributes, table.domain.class_vars, metas)
        table = table.transform(new_domain)
        table[:, new_var] = _genes

        # table name is lost after transpose
        table.name = title

        table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_column] = gds_info[TableAnnotation.gene_id_attribute]
    else:
        table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_attribute] = gds_info[TableAnnotation.gene_id_attribute]

    if callback:
        callback()

    table.attributes = table_annotations
    return table
Beispiel #3
0
def dataset_download(gds_id, samples=None, transpose=False, callback=None):
    file_name = '{}.tab'.format(gds_id)
    local_files.update(file_name, extract=True, callback=callback)

    table = Table(local_files.localpath_download(file_name))
    title = table.name
    gds_info = local_files.info(file_name)
    table_annotations = {TableAnnotation.tax_id: gds_info['taxid']}

    if callback:
        callback()

    if samples is not None:
        filters = [
            table_filter.FilterStringList(sample, sample_types)
            for sample, sample_types in samples.items()
        ]
        table = table_filter.Values(filters)(table)

        column_values = []
        for meta_var in samples.keys():
            column_values.append(
                table.get_column_view(table.domain[meta_var])[0])

        class_values = list(map('|'.join, zip(*column_values)))

        _class_values = list(set(class_values))
        map_class_values = {
            value: key
            for (key, value) in enumerate(_class_values)
        }
        class_var = DiscreteVariable(name='class', values=_class_values)
        _domain = Domain(table.domain.attributes,
                         table.domain.class_vars + (class_var, ),
                         table.domain.metas)

        table = table.transform(_domain)
        col, _ = table.get_column_view(class_var)
        col[:] = [map_class_values[class_val] for class_val in class_values]

    if transpose:
        table = Table.transpose(table,
                                feature_names_column='sample_id',
                                meta_attr_name='genes')
        table.name = title  # table name is lost after transpose
        table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[
            TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_column] = gds_info[
            TableAnnotation.gene_id_attribute]
    else:
        table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[
            TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_attribute] = gds_info[
            TableAnnotation.gene_id_attribute]

    if callback:
        callback()

    table.attributes = table_annotations
    return table
Beispiel #4
0
 def test_no_metadata(self):
     tempdir = tempfile.mkdtemp()
     table = Table("titanic")
     table.attributes = OrderedDict()
     fname = path.join(tempdir, "out.tab")
     TabReader.write_table_metadata(fname, table)
     self.assertFalse(path.isfile(fname + ".metadata"))
     shutil.rmtree(tempdir)
Beispiel #5
0
 def test_no_metadata(self):
     tempdir = tempfile.mkdtemp()
     try:
         table = Table("titanic")
         table.attributes = OrderedDict()
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, table)
         self.assertFalse(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
Beispiel #6
0
 def _data_with_similarity(self, indices):
     data = self.data
     varname = get_unique_names(data.domain, "distance")
     metas = data.domain.metas + (ContinuousVariable(varname), )
     domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
     data_metas = self.distances[indices].reshape((-1, 1))
     if data.domain.metas:
         data_metas = np.hstack((data.metas[indices], data_metas))
     neighbors = Table(domain, data.X[indices], data.Y[indices], data_metas)
     neighbors.attributes = self.data.attributes
     return neighbors
Beispiel #7
0
 def _data_with_similarity(self, indices):
     data = self.data
     varname = get_unique_names(data.domain, "distance")
     metas = data.domain.metas + (ContinuousVariable(varname), )
     domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
     data_metas = self.distances[indices].reshape((-1, 1))
     if data.domain.metas:
         data_metas = np.hstack((data.metas[indices], data_metas))
     neighbors = Table(domain, data.X[indices], data.Y[indices], data_metas)
     neighbors.attributes = self.data.attributes
     return neighbors
 def test_metadata(self):
     tempdir = tempfile.mkdtemp()
     try:
         table = Table("titanic")
         table.attributes = OrderedDict()
         table.attributes["a"] = "aa"
         table.attributes["b"] = "bb"
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, table)
         self.assertTrue(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
Beispiel #9
0
 def test_attributes_saving_as_txt(self):
     tempdir = tempfile.mkdtemp()
     table = Table("titanic")
     table.attributes = OrderedDict()
     table.attributes["a"] = "aa"
     table.attributes["b"] = "bb"
     table.save(path.join(tempdir, "out.tab"))
     table = Table(path.join(tempdir, "out.tab"))
     self.assertIsInstance(table.attributes, OrderedDict)
     self.assertEqual(table.attributes["a"], "aa")
     self.assertEqual(table.attributes["b"], "bb")
     shutil.rmtree(tempdir)
Beispiel #10
0
 def test_attributes_saving_as_txt(self):
     tempdir = tempfile.mkdtemp()
     table = Table("titanic")
     table.attributes = OrderedDict()
     table.attributes["a"] = "aa"
     table.attributes["b"] = "bb"
     table.save(path.join(tempdir, "out.tab"))
     table = Table(path.join(tempdir, "out.tab"))
     self.assertIsInstance(table.attributes, OrderedDict)
     self.assertEqual(table.attributes["a"], "aa")
     self.assertEqual(table.attributes["b"], "bb")
     shutil.rmtree(tempdir)
Beispiel #11
0
 def create_groups_table(data, selection):
     if data is None:
         return None
     names = [var.name for var in data.domain.variables + data.domain.metas]
     name = get_next_name(names, "Selection group")
     metas = data.domain.metas + (DiscreteVariable(
         name, ["Unselected"] +
         ["G{}".format(i + 1) for i in range(np.max(selection))]), )
     domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
     table = Table(domain,
                   data.X,
                   data.Y,
                   metas=np.hstack(
                       (data.metas, selection.reshape(len(data), 1))))
     table.attributes = data.attributes
     table.ids = data.ids
     return table
Beispiel #12
0
def create_annotated_table(data, selected_indices):
    """
    Returns data with concatenated flag column. Flag column represents
    whether data instance has been selected (Yes) or not (No), which is
    determined in selected_indices parameter.

    :param data: Table
    :param selected_indices: list or ndarray
    :return: Table
    """
    if data is None:
        return None
    names = [var.name for var in data.domain.variables + data.domain.metas]
    name = _get_next_name(names, ANNOTATED_DATA_FEATURE_NAME)
    metas = data.domain.metas + (DiscreteVariable(name, ("No", "Yes")),)
    domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
    annotated = np.zeros((len(data), 1))
    if selected_indices is not None:
        annotated[selected_indices] = 1
    table = Table(domain, data.X, data.Y,
                  metas=np.hstack((data.metas, annotated)))
    table.attributes = data.attributes
    return table
Beispiel #13
0
    def test_match_attr_name(self):
        widget = self.widget
        row = widget.attr_boxes.rows[0]
        data_combo, extra_combo = row.left_combo, row.right_combo

        domainA = Domain(
            [
                DiscreteVariable("dA1", ("a", "b", "c", "d")),
                DiscreteVariable("dA2", ("aa", "bb")),
                DiscreteVariable("dA3", ("aa", "bb"))
            ], DiscreteVariable("cls", ("aaa", "bbb", "ccc")),
            [DiscreteVariable("mA1", ("cc", "dd")),
             StringVariable("mA2")])
        XA = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 0], [3, 1, 0]])
        yA = np.array([0, 1, 2, np.nan])
        metasA = np.array([[0.0, "m1"], [1.0, "m2"], [np.nan, "m3"],
                           [0.0, "m4"]]).astype(object)

        domainB = Domain(
            [
                DiscreteVariable("dB1", values=("a", "b", "c")),
                ContinuousVariable("dA2")
            ], None,
            [StringVariable("cls"),
             DiscreteVariable("dA1", ("m4", "m5"))])
        XB = np.array([[0, 0], [1, 1], [2, np.nan]])
        yB = np.empty((3, 0))
        metasB = np.array([[np.nan, np.nan], [1, 1], [0, 0]]).astype(object)
        dataA = Table(domainA, XA, yA, metasA)
        dataA.name = 'dataA'
        dataA.attributes = 'dataA attributes'
        dataB = Table(domainB, XB, yB, metasB)
        dataB.name = 'dataB'
        dataB.attributes = 'dataB attributes'

        self.send_signal(widget.Inputs.data, dataA)
        self.send_signal(widget.Inputs.extra_data, dataB)

        # match variable if available and the other combo is Row Index
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 5)

        # match variable if available and the other combo is ID
        extra_combo.setCurrentIndex(1)
        extra_combo.activated.emit(1)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 5)

        # don't match variable if other combo is set
        extra_combo.setCurrentIndex(4)
        extra_combo.activated.emit(4)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 4)

        # don't match if nothing to match to
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(4)
        data_combo.activated.emit(4)
        self.assertEqual(extra_combo.currentIndex(), 0)

        # don't match numeric with non-numeric
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(3)
        data_combo.activated.emit(3)
        self.assertEqual(extra_combo.currentIndex(), 0)

        # allow matching string with discrete
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(5)
        data_combo.activated.emit(5)
        self.assertEqual(extra_combo.currentIndex(), 4)
Beispiel #14
0
 def test_data_attributes(self):
     """No crash on data attributes of different types"""
     data = Table("iris")
     data.attributes = {"att 1": 1, "att 2": True, "att 3": 3}
     self.send_signal(self.widget.Inputs.data, data)
Beispiel #15
0
 def test_data_attributes(self):
     """No crash on data attributes of different types"""
     data = Table("iris")
     data.attributes = {"att 1": 1, "att 2": True, "att 3": 3}
     self.send_signal(self.widget.Inputs.data, data)