Exemple #1
0
def discretize_ent(infilename,outfilename):
    """
    Discretize features of data sets according to the MDL method proposed by
    [#fayyad1993]_. Necessitate Orange Python module to perform the
    discretization. Only discretize all continuous features of classification datasets.
    
    :param infilename: name of the input file (expecting an arff file)
    :type infilename: string
    :param outfilename: name of the output file
    :type outfilename: string
    """
    
    data = OTable(infilename)
    disc=Disc()
    disc.method=EntropyMDL()

    data_ent = disc(data)

    # Manipulation of the discretized data
    for attr in data_ent.domain.attributes :
        #Reset renamed attributes name to original ones
        if (attr.name[0:2] == "D_"):
            attr.name = attr.name[2:]
            attr.values = [val.replace(',',";") for val in attr.values]
    
    # save the discretized data
    data_ent.save(outfilename)
Exemple #2
0
def discretize_ent(infilename, outfilename):
    """
    Discretize features of data sets according to the MDL method proposed by
    [#fayyad1993]_. Necessitate Orange Python module to perform the
    discretization. Only discretize all continuous features of classification datasets.
    
    :param infilename: name of the input file (expecting an arff file)
    :type infilename: string
    :param outfilename: name of the output file
    :type outfilename: string
    """

    data = OTable(infilename)
    disc = Disc()
    disc.method = EntropyMDL()

    data_ent = disc(data)

    # Manipulation of the discretized data
    for attr in data_ent.domain.attributes:
        #Reset renamed attributes name to original ones
        if (attr.name[0:2] == "D_"):
            attr.name = attr.name[2:]
            attr.values = [val.replace(',', ";") for val in attr.values]

    # save the discretized data
    data_ent.save(outfilename)
Exemple #3
0
def predict_wine_quality(table, n):
    #Make the continous varibles discrete
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=n)
    table = disc(table)
    #Define domain
    feature_vars = list(table.domain[1:])
    class_label_var = table.domain[0]
    wine_domain = Domain(feature_vars, class_label_var)
    table = Table.from_table(domain=wine_domain, source=table)
    #Construct learner and print results
    tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                           max_iter=4000)
    eval_results = CrossValidation(table, [tree_learner], k=10)
    print("Accuracy of cross validation: {:.3f}".format(
        scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
def formatTable(tble):
    '''
    Bins the data, one hot encodes the data
    :param tble:
    :return: data: tble with binned data,
             X: representation of data with one-hot-encoding,
             mapping: representations of what our one-hot-encoding is
    '''
    # Discretization (binning)
    # https://docs.orange.biolab.si/3/data-mining-library/reference/preprocess.html
    print("Discretizing data")
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=4)
    data = disc(tble)
    # print("Discretized table:\n{}\n\n".format(data))

    print("One hot encoding data")
    X, mapping = OneHot.encode(data, include_class=True)
    sorted(mapping.items())

    return data, X, mapping