Example #1
0
    def __call__(self, data, variable):
        variable = data.domain[variable]
        if variable.is_discrete:
            fmt = "{var.name}"
            value = "N/A"
            var = Orange.data.DiscreteVariable(
                fmt.format(var=variable),
                values=variable.values + (value, ),
                compute_value=Lookup(variable,
                                     np.arange(len(variable.values),
                                               dtype=int),
                                     unknown=len(variable.values)),
                sparse=variable.sparse,
            )
            return var

        elif variable.is_continuous:
            fmt = "{var.name}_def"
            indicator_var = Orange.data.DiscreteVariable(
                fmt.format(var=variable),
                values=("undef", "def"),
                compute_value=IsDefined(variable),
                sparse=variable.sparse,
            )
            stats = basic_stats.BasicStats(data, variable)
            return (variable.copy(
                compute_value=ReplaceUnknowns(variable, stats.mean)),
                    indicator_var)
        else:
            raise TypeError(type(variable))
Example #2
0
def column_imputer_as_value(variable, table):
    if isinstance(variable, Orange.data.DiscreteVariable):
        fmt = "{var.name}"
        value = "N/A"
        var = Orange.data.DiscreteVariable(
            fmt.format(var=variable),
            values=variable.values + [value],
            base_value=variable.base_value
        )
        var.get_value_from = Lookup(
            variable,
            numpy.arange(len(variable.values), dtype=int),
            unknown=len(variable.values)
        )
        codomain = [var]
        transformers = [var.get_value_from]
    elif isinstance(variable, Orange.data.ContinuousVariable):
        fmt = "{var.name}_def"
        var = Orange.data.DiscreteVariable(
            fmt.format(var=variable),
            values=("undef", "def"),
        )
        var.get_value_from = IsDefined(variable)
        codomain = [variable, var]
        stats = basic_stats.BasicStats(table, variable)
        transformers = [ReplaceUnknowns(variable, stats.mean),
                        var.get_value_from]
    else:
        raise TypeError(type(variable))

    return ColumnImputerAsValue(
            table.domain, Orange.data.Domain(codomain), transformers)
Example #3
0
    def __call__(self, data, variable, value=None):
        variable = data.domain[variable]
        if value is None:
            if variable.is_continuous:
                stats = basic_stats.BasicStats(data, variable)
                value = stats.mean
            elif variable.is_discrete:
                dist = distribution.get_distribution(data, variable)
                value = dist.modus()
            else:
                raise TypeError("Variable must be continuous or discrete")

        return variable.copy(compute_value=ReplaceUnknowns(variable, value))
Example #4
0
    def __call__(self, data, variable):
        if is_continuous(variable):
            stats = basic_stats.BasicStats(data, variable)
            value = stats.mean
        elif is_discrete(variable):
            dist = distribution.get_distribution(data, variable)
            value = dist.modus()
        else:
            raise TypeError

        var = copy.copy(variable)
        var.compute_value = ReplaceUnknowns(variable, value)
        return var
Example #5
0
def column_imputer_average(variable, table):
    stats = basic_stats.BasicStats(table, variable)
    return column_imputer_defaults(variable, table, stats.mean)
Example #6
0
def column_imputer_minimal(variable, table):
    stats = basic_stats.BasicStats(table, variable)
    return column_imputer_defaults(variable, table, stats.min)