def selection_del(dataset, result=None): feature_count = get_feature_count(dataset) log = [] if result is None: features_all = list(range(feature_count)) error = count_error(dataset, features_all) log.append({"error": error, "feature_count": feature_count}) result = {"error": error, "features": features_all, "log": log} while len(result["features"]) > 1: result_cur = result for feat in result["features"]: features = result["features"][:] features.remove(feat) error = count_error(dataset, features) log.append({"error": error, "feature_count": len(features)}) if error <= result_cur["error"]: result_cur = {"error": error, "features": features, "log": log} if result_cur == result: break result = result_cur return result
def make_plot(dataset, result): logs = result["log"] if not logs: return feature_count = get_feature_count(dataset) __set_labels() __plot_all(dataset, logs, feature_count) __plot_best_in_col(logs, feature_count) __plot_best(result)
def __sort_features(dataset): feature_count = get_feature_count(dataset) feature_errors = list( map(lambda i: { "i": i, "error": count_error(dataset, [i]) }, range(feature_count))) features_sorted = list(sorted(feature_errors, key=lambda x: x["error"])) return list( map(lambda f, pos: { "i": f["i"], "pos": pos }, features_sorted, range(feature_count)))
def selection_full_search(dataset): feature_count = get_feature_count(dataset) log = [] result = {"error": 9999, "features": [], "log": log} for feat_cur in __feat_gen(feature_count): error = count_error(dataset, feat_cur) log.append({"error": error, "feature_count": len(feat_cur)}) if error < result["error"]: result = {"error": error, "features": feat_cur, "log": log} return result
def make_compare_plot(dataset, result, draw_ticks=False, **kwargs): logs = result["log"] if not logs: return feature_count = get_feature_count(dataset) __set_labels() if draw_ticks: __draw_x_ticks(feature_count) y_ticks = __plot_best_in_col(logs, feature_count, **kwargs) __plot_best(result, **kwargs) plot.yticks(y_ticks, __make_quality_tick_labels(dataset, y_ticks))
def selection_dfs(dataset): feature_count = get_feature_count(dataset) features = __sort_features(dataset) logs = [] results = list( map(lambda x: { "error": 9999, "features": [] }, range(feature_count + 1))) __increase([], features, results, dataset, logs) result = min(results, key=lambda x: x["error"]) result["features"] = sorted(__flatten_features(result["features"])) result["log"] = logs return result
def selection_bfs(dataset, iter_limit=1): feature_count = get_feature_count(dataset) logs = [] result_best = { "error": 9999, "features": [], "log": logs } result = list(map(lambda i: {"features": [i]}, range(feature_count))) for iteration in range(1, feature_count + 1): for res in result: res["error"] = count_error(dataset, res["features"]) logs.append({ "error": res["error"], "feature_count": len(res["features"]) }) result.sort(key=lambda r: r["error"]) if len(result) > iter_limit: result = result[:iter_limit] if result[0]["error"] > result_best["error"]: break if result[0]["error"] < result_best["error"]: result_best = result[0] result_new = [] for res in result: for i in range(feature_count): if i not in res["features"]: result_new.append({"features": res["features"] + [i]}) result = result_new result_best["features"].sort() result_best["log"] = logs return result_best
def selection_add(dataset, result=None): feature_count = get_feature_count(dataset) log = [] if result is None: result = { "error": 9999, "features": [], "log": log } while True: result_cur = result for feat in range(feature_count): if feat in result["features"]: continue features = result["features"] + [feat] features.sort() error = count_error(dataset, features) log.append({ "error": error, "feature_count": len(features) }) if error <= result_cur["error"]: result_cur = { "error": error, "features": features, "log": log } if result_cur == result: break result = result_cur return result