def final_policy(self, qfunc): pi = dict() for state in self.states: if state in self.env.terminate_states: continue action = greedy(qfunc, state, self.actions) pi[state] = action return pi
def q_learning(self, num_iter, alpha, epsilon): # 定义行为值函数为字典,并初始化为0 qfunc = dict() for s in self.states: for a in self.actions: qfunc['%d_%s' % (s, a)] = 0.0 # 迭代探索环境 for _ in range(num_iter): # 随机初始化初始状态 state = self.states[int(random.random() * len(self.states))] action = self.actions[int(random.random() * len(self.actions))] is_terminal, count = False, 0 while False == is_terminal and count < 100: policy = "%d_%s" % (state, action) is_terminal, next_state, reward = self.env.transform1( state, action) # next_state处的最大动作,通过greedy得出。 next_action = greedy(qfunc, next_state, self.actions) next_policy = "%d_%s" % (next_state, next_action) # 利用qlearning算法更新值函数, 评估策略是greedy,因为计算当前值函数的下一个策略的action是由greedy得出的。 qfunc[policy] = qfunc[policy] + alpha * ( reward + self.gamma * qfunc[next_policy] - qfunc[policy]) # 转到下一个状态, 行动策略(探索策略)为epsilon_greedy. state, action = next_state, epsilon_greedy( qfunc, next_state, self.actions, epsilon) count += 1 return qfunc
def targetPolicy(self, q_values): """ Returns chosen action and probabilites. Currently set to greedy policy Params ====== q_values(array): action values """ return utils.greedy(q_values)
def exponentiated_gradient_asymmetric(P, preferences, k_d, k_a, num_iter, n_samples=100, step_size=0.01, verbose=False): ''' Runs online gradient ascent from the perspective of the adversary, with greedy best responses for the defender. Returns the list of best responses played by the defender (the uniform distribution over which is their historical mixed strategy) P_ij should be the probability channel i reaches voter j preferences is a 0-1 vector where 1 indicates that a voter preferes c_d k_d/k_a are the defender and attacker budgets ''' #random initialization n = P.shape[0] x_a = np.random.rand((n_samples, n)) for i in range(n_samples): x_a[i] = project_uniform_matroid_boundary(x_a[i], k_a) #historical mixed strategy for defender sigma_d = [] for t in range(num_iter): #get current probability the attacker reaches each voter w = np.zeros((P.shape[1])) for i in range(n_samples): w += preferences[i] * get_p_reached(x_a[i], P) w /= n_samples #solve best response for the defender obj_defender = partial(objective_budget_set, P=P, w=w) S_d, _ = greedy(range(n), k_d, obj_defender) sigma_d.append(S_d) x_d = indicator(S_d, n) #gradient step + projection on the attacker's mixed strategy p_defender_reach = get_p_reached(x_d, P) for i in range(n_samples): grad_attacker = gradient_budget( x_a[i], P, preferences[i] * (1 - p_defender_reach)) # objective_value = objective_budget(x_a[i], P, preferences[i]*(1 - p_defender_reach)) # values.append(objective_value) # if verbose: # print(t, objective_value) # x_a = project_uniform_matroid_boundary(x_a + step_size*grad_attacker, k_a) x_a[i] = x_a[i] * np.exp(step_size * grad_attacker) x_a[i][x_a[i] > 1] = 1 x_a[i] = k_a * x_a[i] / x_a[i].sum() return sigma_d, x_a
def influence(graph, full_graph, samples=SAMPLES): for u,v in graph.edges(): graph[u][v]['p']=PROP_PROBAB def genoptfunction(graph, samples=1000): live_graphs = sample_live_icm(graph, samples) f_multi = make_multilinear_objective_samples(live_graphs, list(graph.nodes()), list(graph.nodes()), np.ones(len(graph))) f_set = multi_to_set(f_multi, graph) return f_set f_set = genoptfunction(graph, samples) S, obj = greedy(list(range(len(graph))), BUDGET, f_set) f_set1 = genoptfunction(full_graph, samples) opt_obj = f_set1(S) return opt_obj, obj, S
def solve_it(input_data, method="", oporation="", solo=False, default=True): # Modify this code to run your optimization algorithm # parse the input lines = input_data.split('\n') nodeCount = int(lines[0]) points = [] for i in range(1, nodeCount + 1): line = lines[i] parts = line.split() points.append(Point(float(parts[0]), float(parts[1]))) if len(points) > 30000 and default: print("greedy") return greedy(points) NPproblem_tsp = tsp.tsp(points) if default: if len(points) > 200: approx = NPproblem_tsp.christofides() return print_solution(NPproblem_tsp, approx, "simulatedAnneling") else: approx = NPproblem_tsp.gurobi_method() return approx if method == 'approximation2': approx = NPproblem_tsp.approximation2() elif method == 'christofides': approx = NPproblem_tsp.christofides() elif method == 'antSystem': Hormigas = AntSystem.AntSystem(points, 1, 2, 0.02, 10, nodeCount, nodeCount) algo = Hormigas.calcule_route() return verbose(algo[1], algo[0]) elif method == 'gurobi': approx = NPproblem_tsp.gurobi_method() return approx else: approx = NPproblem_tsp.christofides() if solo: return verbose(approx[1], approx[0]) else: return print_solution(NPproblem_tsp, approx, oporation)
def attacker_br_value(sigma_d, P, preferences, k_a): ''' Find a greedy best response to a given defender mixed strategy, given as the uniform distribution over the list of sets sigma_d ''' n = P.shape[0] p_defender_reach = np.zeros((P.shape[1])) #average over sigma_d to get the probability each voter is reached by the #defender for S_d in sigma_d: x = indicator(S_d, n) p_defender_reach += get_p_reached(x, P) p_defender_reach /= len(sigma_d) #call greedy for best response obj_attacker = partial(objective_budget_set, P=P, w=preferences * (1 - p_defender_reach)) S_a, value = greedy(range(n), k_a, obj_attacker) return value
def oga(P, preferences, k_d, k_a, num_iter, step_size=0.01, verbose=False): ''' Runs online gradient ascent from the perspective of the adversary, with greedy best responses for the defender. Returns the list of best responses played by the defender (the uniform distribution over which is their historical mixed strategy) P_ij should be the probability channel i reaches voter j preferences is a 0-1 vector where 1 indicates that a voter preferes c_d k_d/k_a are the defender and attacker budgets ''' #random initialization n = P.shape[0] x_a = project_uniform_matroid_boundary(np.random.rand((n)), k_a) #historical mixed strategy for defender sigma_d = [] values = [] for t in range(num_iter): #get current probability the attacker reaches each voter p_attacker_reach = get_p_reached(x_a, P) #solve best response for the defender obj_defender = partial(objective_budget_set, P=P, w=preferences * p_attacker_reach) S_d, _ = greedy(range(n), k_d, obj_defender) sigma_d.append(S_d) x_d = indicator(S_d, n) #gradient step + projection on the attacker's mixed strategy p_defender_reach = get_p_reached(x_d, P) grad_attacker = gradient_budget(x_a, P, preferences * (1 - p_defender_reach)) objective_value = objective_budget( x_a, P, preferences * (1 - p_defender_reach)) values.append(objective_value) if verbose: print(t, objective_value) x_a = project_uniform_matroid_boundary(x_a + step_size * grad_attacker, k_a) return sigma_d, x_a, values
############################################################################################################# # # GREEDY SOLUTIONS # ############################################################################################################# if greedy: if verbose: print('###\tStart compute greedy solutions') start = timer() n_epochs = n_epoch[2] data = [] for i in range(n_epochs): # compute greedy solution opt_sol = utils.greedy(test, i) data.append(opt_sol) utils.write_sol(data, 0, path_plot, sol, '') if verbose: end = timer() print('###\tTime for compute greedy solutions: %.2f sec' % (end - start)) ############################################################################################################# # # INVERSE GREEDY # #############################################################################################################
live_graphs = sample_live_icm(g, 1000) group_indicator = np.ones((len(g.nodes()), 1)) val_oracle = make_multilinear_objective_samples_group(live_graphs, group_indicator, list(g.nodes()), list(g.nodes()), np.ones(len(g))) grad_oracle = make_multilinear_gradient_group(live_graphs, group_indicator, list(g.nodes()), list(g.nodes()), np.ones(len(g))) def f_multi(x): return val_oracle(x, 1000).sum() #f_multi = make_multilinear_objective_samples(live_graphs, list(g.nodes()), list(g.nodes()), np.ones(len(g))) f_set = multi_to_set(f_multi) #find overall optimal solution S, obj = greedy(list(range(len(g))), budget, f_set) for attr_idx, attribute in enumerate(attributes): #all values taken by this attribute values = np.unique([g.node[v][attribute] for v in g.nodes()]) values = np.unique([g.node[v][attribute] for v in g.nodes()]) nodes_attr = {} for vidx, val in enumerate(values): nodes_attr[val] = [v for v in g.nodes() if g.node[v][attribute] == val] group_size[graphname][attribute][run, vidx] = len(nodes_attr[val]) opt_succession = {} if succession: for vidx, val in enumerate(values): h = nx.subgraph(g, nodes_attr[val])