Exemple #1
0
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        if self.data is None:
            self.attrs[:] = []
        else:
            if any(attr.is_continuous for attr in data.domain):
                self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
            else:
                self.discrete_data = self.data
            self.attrs[:] = [
                var for var in chain(self.discrete_data.domain, (
                    var for var in self.data.domain.metas if var.is_discrete))
            ]
        if self.attrs:
            self.attrX = self.attrs[0].name
            self.attrY = self.attrs[len(self.attrs) > 1].name
        else:
            self.attrX = self.attrY = None
            self.areas = self.selection = None
        self.openContext(self.data)
        self.resolve_shown_attributes()
        self.update_selection()
Exemple #2
0
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.init_combos(self.data)
        self.information([0, 1, 2])
        if not self.data:
            self.discrete_data = None
            return
        """ TODO: check
        if data.has_missing_class():
            self.information(1, "Examples with missing classes were removed.")
        """
        if any(attr.is_continuous for attr in data.domain):
            self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
        else:
            self.discrete_data = self.data

        if self.data.domain.class_var is None:
            self.rb_colors.setDisabled(True)
            disc_class = False
        else:
            self.rb_colors.setDisabled(False)
            disc_class = self.data.domain.has_discrete_class
            self.rb_colors.group.button(2).setDisabled(not disc_class)
            self.bar_button.setDisabled(not disc_class)
        self.interior_coloring = bool(disc_class)
        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None
Exemple #3
0
 def discretizer(data):
     if any(attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas)):
         discretize = Discretize(
             method=EqualFreq(n=4), remove_const=False,
             discretize_classes=True, discretize_metas=True)
         return discretize(data).to_dense()
     return data
Exemple #4
0
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.init_combos(self.data)
        if self.data is None:
            self.discrete_data = None
        elif any(attr.is_continuous for attr in data.domain):
            self.discrete_data = Discretize(
                method=EqualFreq(n=4), discretize_classes=True)(data)
        else:
            self.discrete_data = self.data

        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1 \
            and len(self.data.domain.attributes) >= 1)

        if self.data is None:
            return

        has_class = self.data.domain.class_var is not None
        self.rb_colors.setDisabled(not has_class)
        self.interior_coloring = \
            self.CLASS_DISTRIBUTION if has_class else self.PEARSON

        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None
	def set_data(self, data):
		self.closeContext()
		self.clear_messages()
		self.data = data
		self.disc_data = None
		self.selection = []
		if data is not None:
			if len(data) < 2:
				self.Warning.not_enough_inst()
			elif data.Y.size == 0:
				self.Warning.no_class_var()
			else:
				remover = Remove(Remove.RemoveConstant)
				data = remover(data)
				disc_data = Discretize(method=EqualFreq())(data)
				if remover.attr_results["removed"]:
					self.Information.removed_cons_feat()
				if len(disc_data.domain.attributes) < 2:
					self.Warning.not_enough_vars()
				else:
					self.disc_data = disc_data
		self.feature_model.set_domain(self.disc_data and self.disc_data.domain)
		self.openContext(self.disc_data)
		self.apply()
		self.vizrank.button.setEnabled(self.disc_data is not None)
Exemple #6
0
def create_contingencies(X, callback=None):
    window_size = 1
    dim = len(X.domain)

    X_ = Discretize(method=EqualFreq(n=10))(X)
    m = []
    for i, var in enumerate(X_.domain):
        cleaned_values = [
            tuple(map(str.strip,
                      v.strip('[]()<>=≥').split('-'))) for v in var.values
        ]
        try:
            float_values = [[float(v) for v in vals]
                            for vals in cleaned_values]
            bin_centers = {
                i: v[0] if len(v) == 1 else v[0] + (v[1] - v[0])
                for i, v in enumerate(float_values)
            }
        except ValueError:
            bin_centers = {i: i for i, v in enumerate(cleaned_values)}
        m.append(bin_centers)

    from Orange.data.sql.table import SqlTable
    if isinstance(X, SqlTable):
        conts = []
        al = len(X.domain)
        if al > 1:
            conts.append(create_sql_contingency(X_, [0, 1], m))
            if callback:
                callback(1, al)
            for a1, a2, a3 in zip(range(al), range(1, al), range(2, al)):
                conts.append(create_sql_contingency(X_, [a1, a2, a3], m))
                if callback:
                    callback(a3, al)
            if al > 2:
                conts.append(create_sql_contingency(X_, [al - 2, al - 1], m))
                if callback:
                    callback(al, al)
    else:
        conts = [defaultdict(float) for i in range(len(X_.domain))]
        for i, r in enumerate(X_):
            if any(np.isnan(r)):
                continue
            row = tuple(m[vi].get(v) for vi, v in enumerate(r))
            for l in range(len(X_.domain)):
                lower = l - window_size if l - window_size >= 0 else None
                upper = l + window_size + 1 if l + window_size + 1 <= dim else None
                dims = slice(lower, upper)

                conts[l][row[dims]] += 1
        conts = [zip(*x.items()) for x in conts]
        conts = [(np.array(c), np.array(cw)) for c, cw in conts]

    # for i, ((c1, cw1), (c2, cw2)) in enumerate(zip(contss, conts)):
    #     a = np.sort(np.hstack((c1, cw1[:, None])), axis=0)
    #     b = np.sort(np.hstack((c2, cw2[:, None])), axis=0)
    #     assert_almost_equal(a, b)

    return conts
Exemple #7
0
    def set_data(self, data):
        """
        Discretize continuous attributes, and put all attributes and discrete
        metas into self.attrs.

        Select the first two attributes unless context overrides this.
        Method `resolve_shown_attributes` is called to use the attributes from
        the input, if it exists and matches the attributes in the data.

        Remove selection; again let the context override this.
        Initialize the vizrank dialog, but don't show it.

        Args:
            data (Table): input data
        """
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        self.selection = set()
        if self.data is None:
            self.attrs[:] = []
            self.domain_model.set_domain(None)
        else:
            self.domain_model.set_domain(data.domain)
            if any(attr.is_continuous
                   for attr in chain(data.domain, data.domain.metas)):
                discretizer = Discretize(method=EqualFreq(n=4),
                                         remove_const=False,
                                         discretize_classes=True,
                                         discretize_metas=True)
                self.discrete_data = discretizer(data)
            else:
                self.discrete_data = data
        self.attrs = [x for x in self.domain_model if isinstance(x, Variable)]
        if self.attrs:
            self.attr_x = self.attrs[0]
            self.attr_y = self.attrs[len(self.attrs) > 1]
        else:
            self.attr_x = self.attr_y = None
            self.areas = []
            self.selection = set()
        self.openContext(self.data)
        self.resolve_shown_attributes()
        self.update_graph()
        self.update_selection()

        self.vizrank.initialize()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1
            and len(self.data.domain.attributes) > 1)
Exemple #8
0
 def _get_discrete_data(self, data):
     """
     Discretize continuous attributes.
     Return None when there is no data, no rows, or no primitive attributes.
     """
     if (data is None or not len(data) or not any(
             attr.is_discrete or attr.is_continuous
             for attr in chain(data.domain.variables, data.domain.metas))):
         return None
     elif any(attr.is_continuous for attr in data.domain.variables):
         return Discretize(method=EqualFreq(n=4),
                           remove_const=False,
                           discretize_classes=True,
                           discretize_metas=True)(data)
     else:
         return data
def create_contingencies(X, callback=None):
    window_size = 1
    dim = len(X.domain)

    X_ = Discretize(method=EqualFreq(n=10))(X)
    m = get_bin_centers(X_)

    from Orange.data.sql.table import SqlTable

    if isinstance(X, SqlTable):
        conts = []
        al = len(X.domain)
        if al > 1:
            conts.append(create_sql_contingency(X_, [0, 1], m))
            if callback:
                callback(1, al)
            for a1, a2, a3 in zip(range(al), range(1, al), range(2, al)):
                conts.append(create_sql_contingency(X_, [a1, a2, a3], m))
                if callback:
                    callback(a3, al)
            if al > 2:
                conts.append(create_sql_contingency(X_, [al - 2, al - 1], m))
                if callback:
                    callback(al, al)
    else:
        conts = [defaultdict(float) for i in range(len(X_.domain))]
        for i, r in enumerate(X_):
            if any(np.isnan(r)):
                continue
            row = tuple(m[vi].get(v) for vi, v in enumerate(r))
            for l in range(len(X_.domain)):
                lower = l - window_size if l - window_size >= 0 else None
                upper = l + window_size + 1 if l + window_size + 1 <= dim else None
                dims = slice(lower, upper)

                conts[l][row[dims]] += 1
        conts = [zip(*x.items()) for x in conts]
        conts = [(np.array(c), np.array(cw)) for c, cw in conts]

    # for i, ((c1, cw1), (c2, cw2)) in enumerate(zip(contss, conts)):
    #     a = np.sort(np.hstack((c1, cw1[:, None])), axis=0)
    #     b = np.sort(np.hstack((c2, cw2[:, None])), axis=0)
    #     assert_almost_equal(a, b)

    return conts
Exemple #10
0
 def setUp(self):
     self.iris = Table("iris")
     self.adult = Table("adult")
     self.discretizer = Discretize(EqualFreq(n=3))
Exemple #11
0
 def test_discretization(self):
     iris = SqlTable(self.conn, self.iris, inspect_values=True)
     sepal_length = iris.domain["sepal length"]
     EqualFreq(n=4)(iris, sepal_length)
Exemple #12
0
 def setUp(self):
     self.iris = Table('iris')
     self.adult = Table('adult')
     self.discretizer = Discretize(EqualFreq(n=3))