Exemple #1
0
    def test_method(self):
        dom = discretize.DomainDiscretizer(self.table_class)
        self.assertEqual(len(dom[1].values), 4)

        dom = discretize.DomainDiscretizer(self.table_class,
                                           method=discretize.EqualWidth(n=2))
        self.assertEqual(len(dom[1].values), 2)
Exemple #2
0
 def test_equalwidth_100_to_4(self):
     X = np.arange(101).reshape((101, 1))
     table = data.Table(X)
     disc = discretize.EqualWidth(n=4)
     dvar = disc(table, table.domain[0])
     self.assertEqual(len(dvar.values), 4)
     self.assertEqual(dvar.compute_value.points, [25, 50, 75])
Exemple #3
0
 def test_fixed(self):
     dom = discretize.DomainDiscretizer(self.table_no_class,
                                        method=discretize.EqualWidth(n=2),
                                        fixed={"Feature 2": [1, 11]})
     self.assertEqual(len(dom.attributes), 2)
     self.assertEqual(dom[0].compute_value.points, [0.5])
     self.assertEqual(dom[1].compute_value.points, [6])
Exemple #4
0
def random_data(nrows, ncols):
    np.random.seed(42)
    x = np.random.randint(0, 2, (nrows, ncols))
    col = np.random.randint(ncols)
    y = x[:nrows, col].reshape(nrows, 1)
    table = Table.from_numpy(None, x, y)
    table = preprocess.Discretize(discretize.EqualWidth(n=3))(table)
    return table
def random_data(nrows, ncols):
    np.random.seed(42)
    x = np.random.random_integers(0, 1, (nrows, ncols))
    col = np.random.randint(ncols)
    y = x[:nrows, col].reshape(nrows, 1)
    table = Table(x, y)
    table = preprocess.Discretize(discretize.EqualWidth(n=3))(table)
    return table
Exemple #6
0
 def test_equalwidth_on_two_values(self):
     s = [0] * 50 + [1] * 50
     random.shuffle(s)
     X = np.array(s).reshape((100, 1))
     table = data.Table(X)
     disc = discretize.EqualWidth(n=4)
     dvar = disc(table, table.domain[0])
     self.assertEqual(len(dvar.values), 4)
     self.assertEqual(dvar.compute_value.points, [0.25, 0.5, 0.75])
Exemple #7
0
def random_data(nrows, ncols):
    np.random.seed(42)
    x = np.random.random_integers(1, 3, (nrows, ncols))
    col = np.random.randint(ncols)
    y = x[:nrows, col].reshape(nrows, 1)
    table = Table(x, y)
    table = discretize.DiscretizeTable(
        table, method=discretize.EqualWidth(n=3))
    return table
Exemple #8
0
    def test_bayes(self):
        x = np.random.random_integers(0, 1, (100, 5))
        col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Orange.data.Table(x, y)
        t = Orange.preprocess.Discretize(method=discretize.EqualWidth(n=3))(t)
        nb = Orange.classification.NaiveBayesLearner()
        res = Orange.evaluation.TestOnTrainingData(t, [nb])
        np.testing.assert_almost_equal(CA(res), [1])

        t.Y[-20:] = 1 - t.Y[-20:]
        res = Orange.evaluation.TestOnTrainingData(t, [nb])
        self.assertGreaterEqual(CA(res)[0], 0.75)
        self.assertLess(CA(res)[0], 1)
Exemple #9
0
    def test_bayes(self):
        x = np.random.randint(2, size=(100, 5))
        col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Table(x, y)
        t = Discretize(method=discretize.EqualWidth(n=3))(t)
        nb = NaiveBayesLearner()
        res = TestOnTrainingData()(t, [nb])
        np.testing.assert_almost_equal(CA(res), [1])

        t.Y[-20:] = 1 - t.Y[-20:]
        res = TestOnTrainingData()(t, [nb])
        self.assertGreaterEqual(CA(res)[0], 0.75)
        self.assertLess(CA(res)[0], 1)
Exemple #10
0
def predict_wine_quality(table, n):
    #Make the continous varibles discrete
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=n)
    table = disc(table)
    #Define domain
    feature_vars = list(table.domain[1:])
    class_label_var = table.domain[0]
    wine_domain = Domain(feature_vars, class_label_var)
    table = Table.from_table(domain=wine_domain, source=table)
    #Construct learner and print results
    tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                           max_iter=4000)
    eval_results = CrossValidation(table, [tree_learner], k=10)
    print("Accuracy of cross validation: {:.3f}".format(
        scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
Exemple #11
0
class TestCA(unittest.TestCase):
    def test_init(self):
        res = Results(nmethods=2, nrows=100)
        res.actual[:50] = 0
        res.actual[50:] = 1
        res.predicted = np.vstack((res.actual, res.actual))
        np.testing.assert_almost_equal(CA(res), [1, 1])

        res.predicted[0][0] = 1
        np.testing.assert_almost_equal(CA(res), [0.99, 1])

        res.predicted[1] = 1 - res.predicted[1]
        np.testing.assert_almost_equal(CA(res), [0.99, 0])

    def test_call(self):
        res = Results(nmethods=2, nrows=100)
        res.actual[:50] = 0
        res.actual[50:] = 1
        res.predicted = np.vstack((res.actual, res.actual))
        ca = CA()
        np.testing.assert_almost_equal(ca(res), [1, 1])

        res.predicted[0][0] = 1
        np.testing.assert_almost_equal(ca(res), [0.99, 1])

        res.predicted[1] = 1 - res.predicted[1]
        np.testing.assert_almost_equal(ca(res), [0.99, 0])

    def test_bayes(self):
        x = np.random.randint(2, size=(100, 5))
       col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Table(x, y)
        t = Discretize(
            method=discretize.EqualWidth(n=3))(t)
        nb = NaiveBayesLearner()
        res = TestOnTrainingData(t, [nb])
        np.testing.assert_almost_equal(CA(res), [1])

        t.Y[-20:] = 1 - t.Y[-20:]
        res = TestOnTrainingData(t, [nb])
        self.assertGreaterEqual(CA(res)[0], 0.75)
        self.assertLess(CA(res)[0], 1)
def formatTable(tble):
    '''
    Bins the data, one hot encodes the data
    :param tble:
    :return: data: tble with binned data,
             X: representation of data with one-hot-encoding,
             mapping: representations of what our one-hot-encoding is
    '''
    # Discretization (binning)
    # https://docs.orange.biolab.si/3/data-mining-library/reference/preprocess.html
    print("Discretizing data")
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=4)
    data = disc(tble)
    # print("Discretized table:\n{}\n\n".format(data))

    print("One hot encoding data")
    X, mapping = OneHot.encode(data, include_class=True)
    sorted(mapping.items())

    return data, X, mapping
Exemple #13
0
Custom = namedtuple("Custom", ["points"])

METHODS = [(Default, ), (Leave, ), (MDL, ), (EqualFreq, ), (EqualWidth, ),
           (Remove, ), (Custom, )]

_dispatch = {
    Default:
    lambda m, data, var: _dispatch[type(m.method)](m.method, data, var),
    Leave:
    lambda m, data, var: var,
    MDL:
    lambda m, data, var: disc.EntropyMDL()(data, var),
    EqualFreq:
    lambda m, data, var: disc.EqualFreq(m.k)(data, var),
    EqualWidth:
    lambda m, data, var: disc.EqualWidth(m.k)(data, var),
    Remove:
    lambda m, data, var: None,
    Custom:
    lambda m, data, var: disc.Discretizer.create_discretized_var(
        var, m.points)
}

# Variable discretization state
DState = namedtuple(
    "DState",
    [
        "method",  # discretization method
        "points",  # induced cut points
        "disc_var"
    ]  # induced discretized variable
Exemple #14
0
            ()),
 MethodDesc(Methods.MDL,
            "Entropy vs. MDL", "entropy",
            "Split values until MDL exceeds the entropy (Fayyad-Irani)\n"
            "(requires discrete class variable)",
            _mdl_discretization,
            ()),
 MethodDesc(Methods.EqualFreq,
            "Equal frequency, intervals: ", "equal freq, k={}",
            "Create bins with same number of instances",
            lambda data, var, k: disc.EqualFreq(k)(data, var),
            ("freq_spin", )),
 MethodDesc(Methods.EqualWidth,
            "Equal width, intervals: ", "equal width, k={}",
            "Create bins of the same width",
            lambda data, var, k: disc.EqualWidth(k)(data, var),
            ("width_spin", )),
 MethodDesc(Methods.Remove,
            "Remove", "remove",
            "Remove variable",
            lambda *_: None,
            ()),
 MethodDesc(Methods.Binning,
            "Natural binning, desired bins: ", "binning, desired={}",
            "Create bins with nice thresholds; "
            "try matching desired number of bins",
            lambda data, var, nbins: disc.Binning(nbins)(data, var),
            ("binning_spin", )),
 MethodDesc(Methods.FixedWidth,
            "Fixed width: ", "fixed width {}",
            "Create bins with the given width (not for time variables)",