Example #1
0
def continuize_domain(data_or_domain,
                      multinomial_treatment=Continuize.Indicators,
                      continuous_treatment=Continuize.Leave,
                      class_treatment=Continuize.Leave,
                      zero_based=True):

    if isinstance(data_or_domain, Orange.data.Domain):
        data, domain = None, data_or_domain
    else:
        data, domain = data_or_domain, data_or_domain.domain

    def needs_dist(var, mtreat, ctreat):
        "Does the `var` need a distribution given specified flags"
        if var.is_discrete:
            return mtreat == Continuize.FrequentAsBase
        elif var.is_continuous:
            return ctreat != Continuize.Leave
        else:
            raise ValueError

    # Compute the column indices which need a distribution.
    attr_needs_dist = [
        needs_dist(var, multinomial_treatment, continuous_treatment)
        for var in domain.attributes
    ]
    cls_needs_dist = [
        needs_dist(var, class_treatment, Continuize.Leave)
        for var in domain.class_vars
    ]

    columns = [
        i for i, needs in enumerate(attr_needs_dist + cls_needs_dist) if needs
    ]

    if columns:
        if data is None:
            raise TypeError("continuizer requires data")
        dist = distribution.get_distributions_for_columns(data, columns)
    else:
        dist = []

    dist_iter = iter(dist)

    newattrs = [
        continuize_var(var,
                       next(dist_iter) if needs_dist else None,
                       multinomial_treatment, continuous_treatment, zero_based)
        for var, needs_dist in zip(domain.attributes, attr_needs_dist)
    ]

    newclass = [
        continuize_var(var,
                       next(dist_iter) if needs_dist else None,
                       class_treatment, Continuize.Remove, zero_based)
        for var, needs_dist in zip(domain.class_vars, cls_needs_dist)
    ]

    newattrs = reduce(list.__iadd__, newattrs, [])
    newclass = reduce(list.__iadd__, newclass, [])
    return Orange.data.Domain(newattrs, newclass, domain.metas)
Example #2
0
def continuize_domain(
    data_or_domain,
    multinomial_treatment=DomainContinuizer.NValues,
    continuous_treatment=DomainContinuizer.Leave,
    class_treatment=DomainContinuizer.Leave,
    zero_based=True,
):

    if isinstance(data_or_domain, Orange.data.Domain):
        data, domain = None, data_or_domain
    else:
        data, domain = data_or_domain, data_or_domain.domain

    def needs_dist(var, mtreat, ctreat):
        "Does the `var` need a distribution given specified flags"
        if isinstance(var, Orange.data.DiscreteVariable):
            return mtreat == DomainContinuizer.FrequentIsBase
        elif isinstance(var, Orange.data.ContinuousVariable):
            return ctreat != DomainContinuizer.Leave
        else:
            raise ValueError

    # Compute the column indices which need a distribution.
    attr_needs_dist = [needs_dist(var, multinomial_treatment, continuous_treatment) for var in domain.attributes]
    cls_needs_dist = [needs_dist(var, class_treatment, DomainContinuizer.Leave) for var in domain.class_vars]

    columns = [i for i, needs in enumerate(attr_needs_dist + cls_needs_dist) if needs]

    if columns:
        if data is None:
            raise TypeError("continuizer requires data")
        dist = distribution.get_distributions_for_columns(data, columns)
    else:
        dist = []

    dist_iter = iter(dist)

    newattrs = [
        continuize_var(
            var, next(dist_iter) if needs_dist else None, multinomial_treatment, continuous_treatment, zero_based
        )
        for var, needs_dist in zip(domain.attributes, attr_needs_dist)
    ]

    newclass = [
        continuize_var(
            var, next(dist_iter) if needs_dist else None, class_treatment, DomainContinuizer.Ignore, zero_based
        )
        for var, needs_dist in zip(domain.class_vars, cls_needs_dist)
    ]

    newattrs = reduce(list.__iadd__, newattrs, [])
    newclass = reduce(list.__iadd__, newclass, [])
    return Orange.data.Domain(newattrs, newclass, domain.metas)