def continuize_domain(data_or_domain, multinomial_treatment=Continuize.Indicators, continuous_treatment=Continuize.Leave, class_treatment=Continuize.Leave, zero_based=True): if isinstance(data_or_domain, Orange.data.Domain): data, domain = None, data_or_domain else: data, domain = data_or_domain, data_or_domain.domain def needs_dist(var, mtreat, ctreat): "Does the `var` need a distribution given specified flags" if var.is_discrete: return mtreat == Continuize.FrequentAsBase elif var.is_continuous: return ctreat != Continuize.Leave else: raise ValueError # Compute the column indices which need a distribution. attr_needs_dist = [ needs_dist(var, multinomial_treatment, continuous_treatment) for var in domain.attributes ] cls_needs_dist = [ needs_dist(var, class_treatment, Continuize.Leave) for var in domain.class_vars ] columns = [ i for i, needs in enumerate(attr_needs_dist + cls_needs_dist) if needs ] if columns: if data is None: raise TypeError("continuizer requires data") dist = distribution.get_distributions_for_columns(data, columns) else: dist = [] dist_iter = iter(dist) newattrs = [ continuize_var(var, next(dist_iter) if needs_dist else None, multinomial_treatment, continuous_treatment, zero_based) for var, needs_dist in zip(domain.attributes, attr_needs_dist) ] newclass = [ continuize_var(var, next(dist_iter) if needs_dist else None, class_treatment, Continuize.Remove, zero_based) for var, needs_dist in zip(domain.class_vars, cls_needs_dist) ] newattrs = reduce(list.__iadd__, newattrs, []) newclass = reduce(list.__iadd__, newclass, []) return Orange.data.Domain(newattrs, newclass, domain.metas)
def continuize_domain( data_or_domain, multinomial_treatment=DomainContinuizer.NValues, continuous_treatment=DomainContinuizer.Leave, class_treatment=DomainContinuizer.Leave, zero_based=True, ): if isinstance(data_or_domain, Orange.data.Domain): data, domain = None, data_or_domain else: data, domain = data_or_domain, data_or_domain.domain def needs_dist(var, mtreat, ctreat): "Does the `var` need a distribution given specified flags" if isinstance(var, Orange.data.DiscreteVariable): return mtreat == DomainContinuizer.FrequentIsBase elif isinstance(var, Orange.data.ContinuousVariable): return ctreat != DomainContinuizer.Leave else: raise ValueError # Compute the column indices which need a distribution. attr_needs_dist = [needs_dist(var, multinomial_treatment, continuous_treatment) for var in domain.attributes] cls_needs_dist = [needs_dist(var, class_treatment, DomainContinuizer.Leave) for var in domain.class_vars] columns = [i for i, needs in enumerate(attr_needs_dist + cls_needs_dist) if needs] if columns: if data is None: raise TypeError("continuizer requires data") dist = distribution.get_distributions_for_columns(data, columns) else: dist = [] dist_iter = iter(dist) newattrs = [ continuize_var( var, next(dist_iter) if needs_dist else None, multinomial_treatment, continuous_treatment, zero_based ) for var, needs_dist in zip(domain.attributes, attr_needs_dist) ] newclass = [ continuize_var( var, next(dist_iter) if needs_dist else None, class_treatment, DomainContinuizer.Ignore, zero_based ) for var, needs_dist in zip(domain.class_vars, cls_needs_dist) ] newattrs = reduce(list.__iadd__, newattrs, []) newclass = reduce(list.__iadd__, newclass, []) return Orange.data.Domain(newattrs, newclass, domain.metas)