Exemple #1
0
    def fill_in_rule(self, table, ref_bounds):
        domain = table.domain

        # if there are any cols not in the rule, fill them in with table bounds
        conds = {}
        for c in self.filter.conditions:
            attr = domain[c.position]
            name = attr.name
            conds[name] = True

        for col, bounds in ref_bounds.iteritems():
            if col in conds:
                continue

            attr = domain[col]
            pos = domain.index(attr)

            if bounds is None:
                vals = range(len(attr.values))
                vals = [orange.Value(attr, attr.values[v]) for v in vals]
                cond = orange.ValueFilter_discrete(position=pos, values=vals)
            else:
                (minv, maxv) = bounds

                cond = orange.ValueFilter_continuous(
                    position=pos,
                    oper=orange.ValueFilter.Between,
                    min=minv - 1,
                    max=maxv + 1)
            self.filter.conditions.append(cond)
Exemple #2
0
    def getUndiscretized(self, original_data):
        cond = []
        for c in self.filter.conditions:
            d_attribute = self.data.domain[c.position]
            if d_attribute in original_data.domain:
                c.position = original_data.domain.index(d_attribute)
                cond.append(c)
            else:
                position = original_data.domain.index(
                    original_data.domain[d_attribute.name])  #[2:]])

                points = d_attribute.getValueFrom.transformer.points
                value_idx = int(c.values[0])

                if value_idx == 0:  # '<='
                    cond.append(
                        orange.ValueFilter_continuous(position=position,
                                                      max=points[0],
                                                      min=float(-infinity),
                                                      outside=False))
                elif 0 < value_idx < len(points):  # (x,y]
                    cond.append(
                        orange.ValueFilter_continuous(
                            position=position,
                            max=points[value_idx],
                            min=points[
                                value_idx -
                                1],  # zaprti interval '[' namesto odprti '('
                            outside=False))
                elif value_idx == len(points):  # '>'
                    cond.append(
                        orange.ValueFilter_continuous(position=position,
                                                      max=float(infinity),
                                                      min=points[-1],
                                                      outside=True))

        rule = SDRule(original_data, self.targetClass, cond, self.g)
        rule.quality = self.quality
        rule.fix = self.fixed
        rule.score = self.score
        rule.stats_mean = self.stats_mean
        rule.stats_std = self.stats_std
        rule.stats_nmean = self.stats_nmean
        rule.stats_nstd = self.stats_nstd
        rule.stats_max = self.stats_max
        return rule
Exemple #3
0
    def to_rule(self, table, cont_dists=None, disc_dists=None):
        """
    @param cols list of attribute names
    """
        if not self.rule:
            domain = table.domain
            attrnames = [attr.name for attr in domain]
            cont_dists = cont_dists or dict(
                zip(attrnames, Orange.statistics.basic.Domain(table)))
            disc_dists = disc_dists or dict(
                zip(attrnames, Orange.statistics.distribution.Domain(table)))
            conds = []

            for col, bound in zip(self.cols, zip(*self.bbox)):
                attr = domain[col]
                pos = domain.index(attr)
                table_bound = cont_dists[attr.name]
                minv, maxv = r_intersect(bound,
                                         [table_bound.min, table_bound.max])
                if maxv - minv > 0.99 * (table_bound.max - table_bound.min):
                    continue

                conds.append(
                    orange.ValueFilter_continuous(position=pos,
                                                  max=bound[1],
                                                  min=bound[0]))

            for disc_name, vidxs in self.discretes.iteritems():
                attr = domain[disc_name]
                disc_pos = domain.index(attr)
                vals = [
                    orange.Value(attr, attr.values[int(vidx)])
                    for vidx in vidxs if int(vidx) < len(attr.values)
                ]

                if not vals or len(vals) == len(disc_dists[attr.name]):
                    continue

                conds.append(
                    orange.ValueFilter_discrete(position=disc_pos,
                                                values=vals))

            rule = SDRule(table, None, conditions=conds)
            self.rule = rule

        rule = self.rule
        rule.quality = rule.score = self.error
        rule.inf_state = self.inf_state
        rule.c_range = self.c_range
        return rule
Exemple #4
0
    def getFixed(self, original_data):
        cond = []
        for c in self.filter.conditions:
            feature = self.data.domain.attributes[c.position]
            position = original_data.domain.attributes.index(feature.attribute)

            if feature.cond == '==':
                cond.append(
                    orange.ValueFilter_discrete(position=position,
                                                values=[
                                                    orange.Value(
                                                        feature.attribute,
                                                        feature.value)
                                                ]))
            elif feature.cond == '!=':
                cond.append(
                    orange.ValueFilter_discrete(
                        position=position,
                        values=[
                            orange.Value(feature.attribute, value)
                            for value in feature.attribute.values
                            if value != feature.value
                        ]))
            elif feature.cond == '<=':
                cond.append(
                    orange.ValueFilter_continuous(position=position,
                                                  max=feature.value,
                                                  min=float(-infinity),
                                                  outside=False))
            elif feature.cond == '>':
                cond.append(
                    orange.ValueFilter_continuous(position=position,
                                                  max=feature.value,
                                                  min=float(-infinity),
                                                  outside=True))

        return SDRule(original_data, self.targetClass, cond, self.g)
Exemple #5
0
    def dictToCond(d, data):
        if d['type'] == 'num':
            return orange.ValueFilter_continuous(
                position=d['pos'],
                oper=orange.ValueFilter.Between,
                min=d['vals'][0],
                max=d['vals'][1])

        # XXX: NULL hack
        attr = data.domain[d['col']]
        vals = []
        for v in d['vals']:
            if v is None:
                if 'NULL' in attr.values:
                    v = 'NULL'
                elif 'None' in attr.values:
                    v = 'None'
            vals.append(orange.Value(attr, v))
        return orange.ValueFilter_discrete(position=d['pos'], values=vals)
    if pd0 != data[0][1]:
        raise Exception("reference when there shouldn't be")


def testref(mid):
    pd0 = data[0][1]
    mid[0][1] += 1
    if pd0 == data[0][1]:
        raise Exception("not reference when there should be")


filterany = orange.Filter_values()
filterany.domain = data.domain
filterany.conditions.append(
    orange.ValueFilter_continuous(position=data.domain.index("LENGTH"),
                                  min=-9999,
                                  max=9999,
                                  acceptSpecial=True))

# we sometime use LENGT=... and sometimes filterany
# the former cannot be given the 'acceptSpecial' flag, but we would
# still like to test the form of the call when we can
testnonref(data.filter(LENGTH=(-9999, 9999)))
testref(data.filterref(filterany))
testref(data.filterlist(filterany))

ll = [1] * len(data)
testnonref(data.select(ll))
testref(data.selectref(ll))
testref(data.selectlist(ll))

testnonref(data.getitems(range(10)))