def selection_del(dataset, result=None): feature_count = get_feature_count(dataset) log = [] if result is None: features_all = list(range(feature_count)) error = count_error(dataset, features_all) log.append({"error": error, "feature_count": feature_count}) result = {"error": error, "features": features_all, "log": log} while len(result["features"]) > 1: result_cur = result for feat in result["features"]: features = result["features"][:] features.remove(feat) error = count_error(dataset, features) log.append({"error": error, "feature_count": len(features)}) if error <= result_cur["error"]: result_cur = {"error": error, "features": features, "log": log} if result_cur == result: break result = result_cur return result
def __sort_features(dataset): feature_count = get_feature_count(dataset) feature_errors = list( map(lambda i: { "i": i, "error": count_error(dataset, [i]) }, range(feature_count))) features_sorted = list(sorted(feature_errors, key=lambda x: x["error"])) return list( map(lambda f, pos: { "i": f["i"], "pos": pos }, features_sorted, range(feature_count)))
def selection_full_search(dataset): feature_count = get_feature_count(dataset) log = [] result = {"error": 9999, "features": [], "log": log} for feat_cur in __feat_gen(feature_count): error = count_error(dataset, feat_cur) log.append({"error": error, "feature_count": len(feat_cur)}) if error < result["error"]: result = {"error": error, "features": feat_cur, "log": log} return result
def selection_bfs(dataset, iter_limit=1): feature_count = get_feature_count(dataset) logs = [] result_best = { "error": 9999, "features": [], "log": logs } result = list(map(lambda i: {"features": [i]}, range(feature_count))) for iteration in range(1, feature_count + 1): for res in result: res["error"] = count_error(dataset, res["features"]) logs.append({ "error": res["error"], "feature_count": len(res["features"]) }) result.sort(key=lambda r: r["error"]) if len(result) > iter_limit: result = result[:iter_limit] if result[0]["error"] > result_best["error"]: break if result[0]["error"] < result_best["error"]: result_best = result[0] result_new = [] for res in result: for i in range(feature_count): if i not in res["features"]: result_new.append({"features": res["features"] + [i]}) result = result_new result_best["features"].sort() result_best["log"] = logs return result_best
def selection_add(dataset, result=None): feature_count = get_feature_count(dataset) log = [] if result is None: result = { "error": 9999, "features": [], "log": log } while True: result_cur = result for feat in range(feature_count): if feat in result["features"]: continue features = result["features"] + [feat] features.sort() error = count_error(dataset, features) log.append({ "error": error, "feature_count": len(features) }) if error <= result_cur["error"]: result_cur = { "error": error, "features": features, "log": log } if result_cur == result: break result = result_cur return result
def __increase(features_cur, features, results, dataset, logs): length = len(features_cur) result = results[length] error = count_error( dataset, __flatten_features(features_cur)) if length != 0 else 9999 if error != 9999: logs.append({"error": error, "feature_count": length}) for j in range(length): if results[j]["error"] < error: return if error < result["error"]: result["error"] = error result["features"] = features_cur max_feature = __max_feature_pos(features_cur) for feature in features: if feature["pos"] > max_feature: __increase(features_cur + [feature], features, results, dataset, logs)
def log(message, func): print(message) result = measure_time(func) print("Количество ошибок:", result["error"]) log_quality(result) print("Количество признаков: ", len(result["features"])) log_features(result) print("-------------------------------------------") features_all = list(range(feature_count)) log( "Полный набор признаков", lambda: { "error": count_error(dataset, features_all), "features": features_all }) if feature_count <= 15: log("Полный перебор", lambda: selection_full_search(dataset)) log("Алгоритм ADD", lambda: selection_add(dataset)) log("Алгоритм DEL", lambda: selection_del(dataset)) log("Алгоритм ADD-DEL", lambda: selection_add_del(dataset)) log('Поиск в глубину', lambda: selection_dfs(dataset)) log('Поиск в ширину', lambda: selection_bfs(dataset, iter_limit=10))