예제 #1
0
def test_results_to_table(tr, keep_metas=True, keep_attrs=False):
    attrs = []
    is_cont = tr.class_values is None # None if continuous, non-None if discrete

    new_attrs = {}
    for i, cn in enumerate(tr.classifier_names):
        feat_name = 'cls_'+cn
        if is_cont:
            feat = data_utils.make_c_feature(feat_name)
        else:
            feat = data_utils.make_d_feature(feat_name, tr.class_values) # TODO: untested
        new_attrs[feat] = [ r.classes[i] for r in tr.results ]

    try:
        orig_table = tr.examples
    except AttributeError:
        if keep_metas or keep_attrs:
            raise
        else:
            # did not use save_examples on test results, need to construct table from scratch
            # TODO
            raise

    if not keep_attrs:
        attr_selector = lambda x: False
    else:
        attr_selector = None

    return data_utils.cast_table(orig_table,
                                 new_attrs=new_attrs,
                                 attr_selector=attr_selector,
                                 keep_metas=keep_metas)
예제 #2
0
def get_norm_dist_features(data, prefix="dist_", norm_data=None):
    ret = []
    if norm_data is None:
        norm_data = data
    centroid_tuples = get_class_centroids(data)
    for c_val, c_centroid in centroid_tuples:
        c_dist_feat = data_utils.make_c_feature(prefix + c_val)
        norm_dist_func = get_norm_distance_func(c_centroid, norm_data)
        c_dist_feat.get_value_from = _value_from_wrapper(norm_dist_func)
        ret.append(c_dist_feat)
    return ret
예제 #3
0
r_scores = d_utils.get_relief_scores(in_data)
r_scores.sort(key=lambda x: x[1], reverse=True)

narrow_data = cast_table(in_data, attr_selector=[x[0].name for x in r_scores[:40]])
dist_feats = d_utils.get_norm_dist_features(narrow_data)
exemplary_table = d_utils.get_exemplary_table(narrow_data, ['FA', 'GA'])
exem_dist_feats = d_utils.get_norm_dist_features(exemplary_table, 'dist_E', narrow_data)

out_data = cast_table(narrow_data, new_attrs=dist_feats+exem_dist_feats)

get_score_boost = lambda x, rw=None: (x['dist_EGA'] - x['dist_EFA']) / (x['dist_EGA'] + x['dist_EFA'])

get_score_boost_ga = d_utils.get_sigmoid_func(get_score_boost, out_data.filter_ref(R_ah_current='GA'), SIGMA_WIDTH)
get_score_boost_fa = d_utils.get_sigmoid_func(get_score_boost, out_data.filter_ref(R_ah_current='FA'), SIGMA_WIDTH)

score_boost_feat = make_c_feature('score_boost')
def bucketed_score_boost(ex, rw=None):
    if ex['R_ah_current'] == 'FA':
        return get_score_boost_fa(ex)
    else:
        return get_score_boost_ga(ex)
score_boost_feat.get_value_from = bucketed_score_boost
boosted_data = cast_table(out_data, new_attrs=[score_boost_feat])

total_score_feat = make_c_feature('total_score')
def total_score(ex, rw=None, base_scores=BASE_SCORES):
    base_score = base_scores[ex['R_ah_current'].value]
    return base_score + ex['score_boost']
total_score_feat.get_value_from = total_score
scored_data = cast_table(boosted_data, new_class_var=total_score_feat)