Beispiel #1
0
    def test_contingencies(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        iris.domain = Domain(iris.domain[:2] + (EqualWidth()(iris, iris.domain['sepal width']),),
                             iris.domain['iris'])

        conts = get_contingencies(iris)
        self.assertEqual(len(conts), 3)
        self.assertIsInstance(conts[0], Continuous)
        self.assertIsInstance(conts[1], Continuous)
        self.assertIsInstance(conts[2], Discrete)
Beispiel #2
0
    def get_root(self, data, xvar, yvar, zvar=None):
        """Compute the root density map item"""
        assert self.n_bins > 2
        x_disc = EqualWidth(n=self.n_bins)(data, xvar)
        y_disc = EqualWidth(n=self.n_bins)(data, yvar)

        def bins(var):
            points = list(var.compute_value.points)
            assert points[0] <= points[1]
            width = points[1] - points[0]
            return np.array([points[0] - width] + points +
                            [points[-1] + width])

        xbins = bins(x_disc)
        ybins = bins(y_disc)

        # Extend the lower/upper bin edges to infinity.
        # (the grid_bin function has an optimization for this case).
        xbins1 = np.r_[-np.inf, xbins[1:-1], np.inf]
        ybins1 = np.r_[-np.inf, ybins[1:-1], np.inf]

        t = grid_bin(data, xvar, yvar, xbins1, ybins1, zvar=zvar)
        return t._replace(xbins=xbins, ybins=ybins)
Beispiel #3
0
"""
import pandas as pd
import datetime
from Orange.data import Table, Domain
from Orange.classification import NNClassificationLearner
from Orange.evaluation import CrossValidation, scoring
from Orange.preprocess import Normalize, Scale
from Orange.preprocess import DomainDiscretizer
from Orange.preprocess.discretize import EqualWidth
 
raw_data_table = Table.from_file("white wine.csv")

feature_vars = list(raw_data_table.domain.variables[1:11])
#Bucket the classifier into distinct bins
discretizer = DomainDiscretizer()
discretizer.method = EqualWidth(n=9)
discretizer_domain = discretizer(raw_data_table)
class_label_var = discretizer_domain[0]
print(class_label_var.values)
wine_domain = Domain(feature_vars, class_label_var)
data_table = Table.from_table(domain=wine_domain, source=raw_data_table)
Table.save(data_table, "data_table normal2.csv")
def normalize_table(table_to_process):
    norm = Normalize(norm_type=Normalize.NormalizeBySpan)
    norm.transform_class = False
    norm_data_table = norm(table_to_process)
    norm_data_table.shuffle()
    return norm_data_table
#Normalise the feature values
norm_data_table = normalize_table(data_table)
print("Applying learner on total data records {}".format(len(norm_data_table)))