def test_results_to_table(tr, keep_metas=True, keep_attrs=False): attrs = [] is_cont = tr.class_values is None # None if continuous, non-None if discrete new_attrs = {} for i, cn in enumerate(tr.classifier_names): feat_name = 'cls_'+cn if is_cont: feat = data_utils.make_c_feature(feat_name) else: feat = data_utils.make_d_feature(feat_name, tr.class_values) # TODO: untested new_attrs[feat] = [ r.classes[i] for r in tr.results ] try: orig_table = tr.examples except AttributeError: if keep_metas or keep_attrs: raise else: # did not use save_examples on test results, need to construct table from scratch # TODO raise if not keep_attrs: attr_selector = lambda x: False else: attr_selector = None return data_utils.cast_table(orig_table, new_attrs=new_attrs, attr_selector=attr_selector, keep_metas=keep_metas)
def get_norm_dist_features(data, prefix="dist_", norm_data=None): ret = [] if norm_data is None: norm_data = data centroid_tuples = get_class_centroids(data) for c_val, c_centroid in centroid_tuples: c_dist_feat = data_utils.make_c_feature(prefix + c_val) norm_dist_func = get_norm_distance_func(c_centroid, norm_data) c_dist_feat.get_value_from = _value_from_wrapper(norm_dist_func) ret.append(c_dist_feat) return ret
r_scores = d_utils.get_relief_scores(in_data) r_scores.sort(key=lambda x: x[1], reverse=True) narrow_data = cast_table(in_data, attr_selector=[x[0].name for x in r_scores[:40]]) dist_feats = d_utils.get_norm_dist_features(narrow_data) exemplary_table = d_utils.get_exemplary_table(narrow_data, ['FA', 'GA']) exem_dist_feats = d_utils.get_norm_dist_features(exemplary_table, 'dist_E', narrow_data) out_data = cast_table(narrow_data, new_attrs=dist_feats+exem_dist_feats) get_score_boost = lambda x, rw=None: (x['dist_EGA'] - x['dist_EFA']) / (x['dist_EGA'] + x['dist_EFA']) get_score_boost_ga = d_utils.get_sigmoid_func(get_score_boost, out_data.filter_ref(R_ah_current='GA'), SIGMA_WIDTH) get_score_boost_fa = d_utils.get_sigmoid_func(get_score_boost, out_data.filter_ref(R_ah_current='FA'), SIGMA_WIDTH) score_boost_feat = make_c_feature('score_boost') def bucketed_score_boost(ex, rw=None): if ex['R_ah_current'] == 'FA': return get_score_boost_fa(ex) else: return get_score_boost_ga(ex) score_boost_feat.get_value_from = bucketed_score_boost boosted_data = cast_table(out_data, new_attrs=[score_boost_feat]) total_score_feat = make_c_feature('total_score') def total_score(ex, rw=None, base_scores=BASE_SCORES): base_score = base_scores[ex['R_ah_current'].value] return base_score + ex['score_boost'] total_score_feat.get_value_from = total_score scored_data = cast_table(boosted_data, new_class_var=total_score_feat)