Beispiel #1
0
def column_imputer_as_value(variable, table):
    if isinstance(variable, Orange.data.DiscreteVariable):
        fmt = "{var.name}"
        value = "N/A"
        var = Orange.data.DiscreteVariable(
            fmt.format(var=variable),
            values=variable.values + [value],
            base_value=variable.base_value
        )
        var.get_value_from = Lookup(
            variable,
            numpy.arange(len(variable.values), dtype=int),
            unknown=len(variable.values)
        )
        codomain = [var]
        transformers = [var.get_value_from]
    elif isinstance(variable, Orange.data.ContinuousVariable):
        fmt = "{var.name}_def"
        var = Orange.data.DiscreteVariable(
            fmt.format(var=variable),
            values=("undef", "def"),
        )
        var.get_value_from = IsDefined(variable)
        codomain = [variable, var]
        stats = basic_stats.BasicStats(table, variable)
        transformers = [ReplaceUnknowns(variable, stats.mean),
                        var.get_value_from]
    else:
        raise TypeError(type(variable))

    return ColumnImputerAsValue(
            table.domain, Orange.data.Domain(codomain), transformers)
Beispiel #2
0
def remove_unused_values(var, data):
    column_data = Orange.data.Table.from_table(
        Orange.data.Domain([var]),
        data
    )
    array = column_data.X.ravel()
    mask = numpy.isfinite(array)
    unique = numpy.array(numpy.unique(array[mask]), dtype=int)

    if len(unique) == len(var.values):
        return var

    used_values = [var.values[i] for i in unique]
    new_var = Orange.data.DiscreteVariable(
        "R_{}".format(var.name),
        values=used_values
    )
    translation_table = numpy.array([numpy.NaN] * len(var.values))
    translation_table[unique] = range(len(new_var.values))

    if 0 >= var.base_value < len(var.values):
        base = translation_table[var.base_value]
        if numpy.isfinite(base):
            new_var.base_value = int(base)

    new_var.get_value_from = Lookup(var, translation_table)
    return new_var
Beispiel #3
0
def merge_lookup(A, B):
    """
    Merge two consecutive Lookup transforms into one.
    """
    lookup_table = numpy.array(A.lookup_table)
    mask = numpy.isfinite(lookup_table)
    indices = numpy.array(lookup_table[mask], dtype=int)
    lookup_table[mask] = B.lookup_table[indices]
    return Lookup(A.variable, lookup_table)
Beispiel #4
0
def sort_var_values(var):
    newvalues = list(sorted(var.values))

    if newvalues == list(var.values):
        return var

    translation_table = numpy.array(
        [float(newvalues.index(value)) for value in var.values])

    newvar = Orange.data.DiscreteVariable(var.name, values=newvalues)
    newvar.compute_value = Lookup(var, translation_table)
    return newvar