def Lasso(filepath, esti): # Input X, y, (n_samples, n_features, n_classes) = load_data(filepath) # Train weight = np.ones(n_features) pbar = tqdm(range(100)) last = 0 for epoch in pbar: logitModel = LogisticRegression(solver="saga", multi_class="multinomial", max_iter=10000, penalty="l1", n_jobs=-1) logitModel.fit(X * weight, y.ravel()) # Calc Weight cnt, weight = 0, [] for i in range(n_features): if np.abs(logitModel.coef_[0][i])>0: weight.append(1) else: weight.append(0) cnt = cnt + 1 weight = np.array(weight) if last != cnt: last = cnt else: break # print("Dr = {0}".format(1.0 * cnt / n_features)) return weight
def FSFOA(filepath, esti): # Input X, y, (n_samples, n_features, n_classes) = load_data(filepath) # Settings life_time = 15 area_limit = 50 transfer_rate = 5. / 100 lsc, gsc = int(n_features / 5), int(n_features * 2 / 5) # Init Forest forest = [Tree(n_features) for _ in range(int(area_limit / 10))] acc_pool = [] # Run Forest pbar = tqdm(range(100)) for epoch in pbar: # Local Seeding new_trees = [] for tree in forest: tree.age = tree.age + 1 if tree.age > 1: continue for _ in range(lsc): new_trees.append(deepcopy(tree)) new_trees[-1].reverse(1) new_trees[-1].update(X, y, esti) forest += new_trees # Delete Trees candidate, new_forest = [], [] for tree in forest: if tree.age > life_time: candidate.append(tree) else: new_forest.append(tree) forest = [] new_forest.sort(key=lambda x: x.acc, reverse=True) for i in range(len(new_forest)): if i >= area_limit: candidate.append(new_forest[i]) else: forest.append(new_forest[i]) # Global Seeding candidate_len = len(candidate) idx = np.array([x for x in range(candidate_len)]) np.random.shuffle(idx) for ii in range(int(candidate_len * transfer_rate)): i = idx[ii] tree = candidate[i] tree.age = 0 tree.reverse(gsc) tree.update(X, y, esti) forest.append(tree) forest.sort(key=lambda x: x.acc, reverse=True) forest[0].age = 0 # print(forest[0].acc) if len(acc_pool) > 10: acc_pool.pop(0) if len(acc_pool) >= 10 and np.mean(acc_pool) + EPS >= forest[0].acc: break acc_pool.append(forest[0].acc) # print(len(new_forest), candidate_len, np.mean(acc_pool), acc_pool[-1]) print("Acc = {0} DR = {1}".format(forest[0].acc, forest[0].DR())) return forest[0].weight
def LGA(filepath, esti, epoch_limit=100): # Input X, y, (n_samples, n_features, n_classes) = load_data(filepath) task_pool = Pool(12) # Init Group group = [] for k in range(1048576): if pow(2, k) > n_features: break white, black = Node(n_features), Node(n_features) for i in range(n_features): if ((i>>k)&1) == 1: black.weight[i] = 1 else: white.weight[i] = 1 black.update_index() group.append(black) white.update_index() group.append(white) group = task_pool.map(partial(lasso_task, X=X, y=y, esti=esti), group) group.sort(key=lambda x: x.fitness(), reverse=True) # print("Inited") group_siz = int(len(group) * 1.5) fitness_pool = [] epochs = tqdm(range(epoch_limit)) for epoch in epochs: cur_group_siz = len(group) # Mutation mutation_T = max(1., n_features / 2. * np.log2(2. * (epoch_limit - epoch) / epoch_limit)) # print(mutation_T) mutation_group = task_pool.map(partial(mutation_task, T=mutation_T), group) mutation_group = task_pool.map(partial(lasso_task, X=X, y=y, esti=esti), mutation_group) # print("Mutated") # Cross kf = KFold(5, True) lim, cur_cross = int(min(5, cur_group_siz)), 0 cross_list, cross_group = [x for x in range(cur_group_siz)], [] for _, cross_index in kf.split(cross_list): cross_obj = [group[x] for x in cross_index] cross_group += task_pool.map(partial(cross_task, father=group[cur_cross]), cross_obj) cur_cross = cur_cross + 1 cross_group = task_pool.map(partial(lasso_task, X=X, y=y, esti=esti), cross_group) # print("Crossed") # Select group += mutation_group group += cross_group group.sort(key=lambda x: x.fitness(), reverse=True) group = group[:group_siz] # Early Stop if len(fitness_pool) > 20: fitness_pool.pop(0) if len(fitness_pool) >= 20 and np.mean(fitness_pool)+EPS >= group[0].fitness(): break fitness_pool.append(group[0].fitness()) task_pool.close() # Log print(group[0].fitness(), group[0].acc) return group[0].weight
def EFSFOA(filepath, esti): #init X, y, (n_samples, n_features, n_classes) = load_data(filepath) transferrate = 0.05 arealimit = 50 lifetime = 15 lsc = 2 best = 0 forest = [] candidate = [] n = 5 igr = np.random.randint(n_features) igr = get_igr(X, y, n_features) cmpfun = operator.attrgetter('fitness') cnt = 0 alpha = 100.0 beta = 0.01 #init forest for i in range(n): tree = Tree(n_features) tree.set(igr, 1) forest.append(tree) changefe = random.sample(range(0, n), int(n / 2)) for i in range(int(n / 2)): forest[changefe[i]].set(igr, 1) for i in range(n): forest[i].fitness = cal_fitness( X, y, forest[i], alpha, beta * np.sum(forest[i].f) / n_features, esti) #loop while cnt < 10: #seeding near n = len(forest) i = 0 while i < n: if forest[i].age >= lifetime: candidate.append(forest[i]) forest.pop(i) n -= 1 i -= 1 elif forest[i].age == 0: seed = random.sample(range(0, n_features), lsc) for j in range(lsc): tree = copy.deepcopy(forest[i]) tree.change(seed[j]) if np.sum(tree.f) != 0: k = tree_cmp(tree, forest, candidate) if k == -1: tree.fitness = cal_fitness( X, y, tree, alpha, beta * np.sum(tree.f) / n_features, esti) forest.append(tree) elif k < len(forest): forest[k].age = forest[k].age + 1 forest[i].age = forest[i].age + 1 i += 1 n = len(forest) if n > arealimit: forest.sort(key=cmpfun) for i in range(n - arealimit): candidate.append(forest[0]) forest.pop(0) if best >= forest[len(forest) - 1].fitness: cnt = cnt + 1 else: cnt = 0 best = max(best, forest[len(forest) - 1].fitness) #print(best, cnt) #print("########################") #seeding global n = len(candidate) num = int(transferrate * n) c_candidate = random.sample(range(0, n), num) for i in range(num): tree = candidate[c_candidate[i]] tree.gsc = int( min(2 + 2 * lifetime, n_features * 0.5) / (tree.age + 1)) c_feature = random.sample(range(0, n_features), tree.gsc) for j in range(tree.gsc): tree.change(c_feature[j]) tree.fitness = cal_fitness(X, y, tree, alpha, beta * np.sum(tree.f) / n_features, esti) if num > 0: candidate.sort(key=cmpfun, reverse=True) candidate[0].age = 0 forest.append(candidate[0]) candidate.pop(0) forest.sort(key=cmpfun) su = [] second = [] third = [] for i in range(n_features): su.append([0, i]) second.append(0) third.append(0) for tree in forest: for i in range(n_features): su[i][0] = su[i][0] + tree.f[i] su = sorted(su) su.reverse() n = forest[len(forest) - 1].f.sum() tree2 = Tree(n_features) tree3 = Tree(n_features) for i in range(n): tree2.f[su[i][1]] = 1 if i != n - 1: tree3.f[su[i][1]] = 1 c1 = cal_fitness(X, y, forest[len(forest) - 1], alpha, beta * np.sum(forest[len(forest) - 1].f) / n_features, esti) c2 = cal_fitness(X, y, tree2, alpha, beta * np.sum(tree2.f) / n_features, esti) c3 = cal_fitness(X, y, tree3, alpha, beta * np.sum(tree3.f) / n_features, esti) if c2 < c1: tree2 = copy.deepcopy(forest[len(forest) - 1]) c2 = c1 if c3 < c2: tree3 = copy.deepcopy(tree2) c3 = c2 #print("DR: {0}".format(1.-np.sum(tree3.f) / n_features)) return tree3.f
def GDFS(filepath, esti): # Input X, y, (n_samples, n_features, n_classes) = load_data(filepath) EPS = 1.0 weight_val = np.ones(n_features) (X_input, y_input, coef, bias, lam, learning), (train, weight, loss) = build_tf_graph(n_features, n_classes, weight_val) best_acc, best_dr, best_weight_val = 0, 0, np.ones(n_features) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) pbar = tqdm(range(100)) for epoch in pbar: # Logistic Regression X_trans = X * weight_val X_train, X_test, y_train, y_test = train_test_split( X_trans, y, test_size=0.3, random_state=19260817) logitModel = LogisticRegression(solver="lbfgs", multi_class="multinomial", max_iter=10000, penalty="l2", n_jobs=-1) logitModel.fit(X_train, y_train.ravel()) # Lasso Features with Proba y_prob = logitModel.predict_proba(X) _, loss_val, weight_val = sess.run( [train, loss, weight], feed_dict={ X_input: X, y_input: y_prob, coef: logitModel.coef_.T, bias: logitModel.intercept_, learning: 0.1, lam: 1 }) # Delete Features idx = [] for i in range(len(weight_val)): if weight_val[i] < EPS: weight_val[i] = 0 else: weight_val[i] = 1 idx.append(i) if len(idx) == 0: break # Calc Acc, Dr cur_acc = get_acc(X[:, idx], y, esti, False) cur_dr = 1.0 - 1.0 * len(idx) / n_features # print(" * Acc = {0} DR = {1}".format(cur_acc, cur_dr)) # Update Best Weight (Acc First) if (cur_acc > best_acc or (np.abs(cur_acc - best_acc) < 1e-8 and cur_dr > best_dr)) and cur_dr > 0: best_acc, best_dr = cur_acc, cur_dr best_weight_val = weight_val # print(" * Update Acc={0} Dr={1}".format(best_acc, best_dr)) print("Best Acc = {0}, DR = {1}".format(best_acc, best_dr)) return best_weight_val