def extract_rules(self, rules, colname, rule_list=None): if rule_list is None: rule_list = [] case = 0 var = '' limit = None levels = '' return_value = False new_tree = {} new_tree['name'] = rules['name'] if rules.has_key('children'): new_tree['children'] = [] for children in rules['children']: new_tree['children'].append( self.extract_rules(rules=children, colname=colname, rule_list=rule_list + [rules['name']])) return new_tree else: DFF = DataFrameFilterer(self._data_frame1) success = 0 total = 0 target = rules['name'][9:] for rule in rule_list: if ' <= ' in rule: var, limit = re.split(' <= ', rule) DFF.values_below(var, limit) elif ' > ' in rule: var, limit = re.split(' > ', rule) DFF.values_above(var, limit) elif ' not in ' in rule: var, levels = re.split(' not in ', rule) DFF.values_not_in(var, levels) elif ' in ' in rule: var, levels = re.split(' in ', rule) DFF.values_in(var, levels) self._splits.sort() self._splits = list(set(self._splits)) binned_colname = DFF.bucketize(self._splits, colname) target = self._map[float(target.strip())]['group'] agg_result = DFF.get_aggregated_result(binned_colname, target) for rows in agg_result: if (self._label_code[rows[0]] == target): success = rows[1] total = total + rows[1] if (total > 0): if not self._new_rules.has_key(target): self._new_rules[target] = [] self._total[target] = [] self._success[target] = [] self._probability[target] = [] self._new_rules[target].append(','.join(rule_list)) self._total[target].append(total) self._success[target].append(success) self._probability[target].append(success * 100.0 / total) key = float(new_tree['name'][9:]) new_tree['name'] = 'Predict: ' + self._map[key]['group'] return new_tree
def extract_rules(self, rules, colname, rule_list=None): if rule_list is None: rule_list = [] case = 0 var = '' limit = None levels = '' return_value = False new_tree = {} new_tree['name'] = rules['name'] if 'children' in rules: new_tree['children'] = [] for children in rules['children']: new_tree['children'].append( self.extract_rules(rules=children, colname=colname, rule_list=rule_list + [rules['name']])) return new_tree else: DFF = DataFrameFilterer(self._data_frame1) success = 0 total = 0 target = rules['name'][9:] for rule in rule_list: if ' <= ' in rule: var, limit = re.split(' <= ', rule) DFF.values_below(var, limit) elif ' > ' in rule: var, limit = re.split(' > ', rule) DFF.values_above(var, limit) elif ' not in ' in rule: var, levels = re.split(' not in ', rule) DFF.values_not_in(var, levels) elif ' in ' in rule: var, levels = re.split(' in ', rule) DFF.values_in(var, levels) for rows in DFF.get_aggregated_result(colname, target): if (rows[0] == target): success = rows[1] total = total + rows[1] if (total > 0): if target not in self._new_rules: self._new_rules[target] = [] self._total[target] = [] self._success[target] = [] self._probability[target] = [] self._new_rules[target].append(','.join(rule_list)) self._total[target].append(total) self._success[target].append(success) self._probability[target].append( old_div(success * 100.0, total)) return new_tree
def extract_rules(self, rule_list, target): if not self._important_vars.has_key(target): self._important_vars[target] = [] target = self._reverse_map[target] DFF = DataFrameFilterer(self._data_frame1) colname = self._target_dimension success = 0 total = 0 important_vars = [] for rule in rule_list: if ' <= ' in rule: var,limit = re.split(' <= ',rule) DFF.values_below(var,limit) elif ' > ' in rule: var,limit = re.split(' > ',rule) DFF.values_above(var,limit) elif ' not in ' in rule: var,levels = re.split(' not in ',rule) levels=levels[1:-1].split(",") levels = [self._alias_dict[x] for x in levels] DFF.values_not_in(var,levels) elif ' in ' in rule: var,levels = re.split(' in ',rule) levels=levels[1:-1].split(",") levels = [self._alias_dict[x] for x in levels] DFF.values_in(var,levels) important_vars.append(var) for rows in DFF.get_aggregated_result(colname,target): if(rows[0]==target): success = rows[1] total = total + rows[1] target = self._mapping_dict[self._target_dimension][target] self._important_vars[target] = list(set(self._important_vars[target] + important_vars)) if (total > 0): if not self._new_rules.has_key(target): self._new_rules[target] = [] self._total[target] = [] self._success[target] = [] self._probability[target] = [] self._new_rules[target].append(','.join(rule_list)) self._total[target].append(total) self._success[target].append(success) self._probability[target].append(success*100.0/total) return success
def extract_rules(self, rule_list, target): if target not in self._important_vars: self._important_vars[target] = [] target = self._reverse_map[target] DFF = DataFrameFilterer(self._data_frame1, self._pandas_flag) colname = self._target_dimension success = 0 total = 0 important_vars = [] targetcols = [] row_count = [] dict_tree = [] data_dict = {} for rows in DFF.get_count_result(colname): if rows is not None: data_dict[rows[0]] = rows[1] dict_tree.append(data_dict) for rule in rule_list: if ' <= ' in rule: var, limit = re.split(' <= ', rule) DFF.values_below(var, float(limit)) data_dict = {} for rows in DFF.get_count_result(colname): if rows is not None: data_dict[rows[0]] = rows[1] dict_tree.append(data_dict) elif ' > ' in rule: var, limit = re.split(' > ', rule) DFF.values_above(var, float(limit)) data_dict = {} for rows in DFF.get_count_result(colname): if rows is not None: data_dict[rows[0]] = rows[1] dict_tree.append(data_dict) elif ' not in ' in rule: var, levels = re.split(' not in ', rule) levels = levels[1:-1].split(",") #levels = [self._alias_dict[x] for x in levels] levels1 = [ key if x == key else self._alias_dict[x] for x in levels for key in list(self._alias_dict.keys()) ] DFF.values_not_in(var, levels1, self._measure_columns) data_dict = {} for rows in DFF.get_count_result(colname): if rows is not None: data_dict[rows[0]] = rows[1] dict_tree.append(data_dict) elif ' in ' in rule: var, levels = re.split(' in ', rule) levels = levels[1:-1].split(",") #levels = [self._alias_dict[x] for x in levels] levels1 = [ key if x == key else self._alias_dict[x] for x in levels for key in list(self._alias_dict.keys()) ] DFF.values_in(var, levels1, self._measure_columns) data_dict = {} for rows in DFF.get_count_result(colname): if rows is not None: data_dict[rows[0]] = rows[1] dict_tree.append(data_dict) important_vars.append(var) for rows in DFF.get_aggregated_result(colname, target): if (rows[0] == target): success = rows[1] total = total + rows[1] target = self._mapping_dict[self._target_dimension][target] self._important_vars[target] = list( set(self._important_vars[target] + important_vars)) if (total > 0): if target not in self._new_rules: self._new_rules[target] = [] self._total[target] = [] self._success[target] = [] self._probability[target] = [] self._new_rules[target].append(','.join(rule_list)) self._total[target].append(total) self._success[target].append(success) self._probability[target].append(old_div(success * 100.0, total)) return success, total, dict_tree