Ejemplo n.º 1
0
 def final_policy(self, qfunc):
     pi = dict()
     for state in self.states:
         if state in self.env.terminate_states: continue
         action = greedy(qfunc, state, self.actions)
         pi[state] = action
     return pi
Ejemplo n.º 2
0
    def q_learning(self, num_iter, alpha, epsilon):
        # 定义行为值函数为字典,并初始化为0
        qfunc = dict()
        for s in self.states:
            for a in self.actions:
                qfunc['%d_%s' % (s, a)] = 0.0
        # 迭代探索环境
        for _ in range(num_iter):
            # 随机初始化初始状态
            state = self.states[int(random.random() * len(self.states))]
            action = self.actions[int(random.random() * len(self.actions))]

            is_terminal, count = False, 0
            while False == is_terminal and count < 100:
                policy = "%d_%s" % (state, action)
                is_terminal, next_state, reward = self.env.transform1(
                    state, action)
                # next_state处的最大动作,通过greedy得出。
                next_action = greedy(qfunc, next_state, self.actions)
                next_policy = "%d_%s" % (next_state, next_action)
                # 利用qlearning算法更新值函数, 评估策略是greedy,因为计算当前值函数的下一个策略的action是由greedy得出的。
                qfunc[policy] = qfunc[policy] + alpha * (
                    reward + self.gamma * qfunc[next_policy] - qfunc[policy])
                # 转到下一个状态, 行动策略(探索策略)为epsilon_greedy.
                state, action = next_state, epsilon_greedy(
                    qfunc, next_state, self.actions, epsilon)
                count += 1
        return qfunc
Ejemplo n.º 3
0
    def targetPolicy(self, q_values):
        """ Returns chosen action and probabilites. Currently set to greedy policy

        Params
        ======
            q_values(array): action values
        """
        return utils.greedy(q_values)
def exponentiated_gradient_asymmetric(P,
                                      preferences,
                                      k_d,
                                      k_a,
                                      num_iter,
                                      n_samples=100,
                                      step_size=0.01,
                                      verbose=False):
    '''
    Runs online gradient ascent from the perspective of the adversary, with 
    greedy best responses for the defender. Returns the list of best responses
    played by the defender (the uniform distribution over which is their historical
    mixed strategy)
    
    P_ij should be the probability channel i reaches voter j
    
    preferences is a 0-1 vector where 1 indicates that a voter preferes c_d
    
    k_d/k_a are the defender and attacker budgets
    '''
    #random initialization
    n = P.shape[0]
    x_a = np.random.rand((n_samples, n))
    for i in range(n_samples):
        x_a[i] = project_uniform_matroid_boundary(x_a[i], k_a)
    #historical mixed strategy for defender
    sigma_d = []
    for t in range(num_iter):
        #get current probability the attacker reaches each voter
        w = np.zeros((P.shape[1]))
        for i in range(n_samples):
            w += preferences[i] * get_p_reached(x_a[i], P)
        w /= n_samples
        #solve best response for the defender
        obj_defender = partial(objective_budget_set, P=P, w=w)
        S_d, _ = greedy(range(n), k_d, obj_defender)
        sigma_d.append(S_d)
        x_d = indicator(S_d, n)
        #gradient step + projection on the attacker's mixed strategy
        p_defender_reach = get_p_reached(x_d, P)
        for i in range(n_samples):
            grad_attacker = gradient_budget(
                x_a[i], P, preferences[i] * (1 - p_defender_reach))
            #            objective_value = objective_budget(x_a[i], P, preferences[i]*(1 - p_defender_reach))
            #            values.append(objective_value)
            #            if verbose:
            #                print(t, objective_value)
            #        x_a = project_uniform_matroid_boundary(x_a + step_size*grad_attacker, k_a)
            x_a[i] = x_a[i] * np.exp(step_size * grad_attacker)
            x_a[i][x_a[i] > 1] = 1
            x_a[i] = k_a * x_a[i] / x_a[i].sum()
    return sigma_d, x_a
Ejemplo n.º 5
0
def influence(graph, full_graph, samples=SAMPLES):
    for u,v in graph.edges():
        graph[u][v]['p']=PROP_PROBAB
    
    def genoptfunction(graph, samples=1000):
        live_graphs = sample_live_icm(graph, samples)
        f_multi = make_multilinear_objective_samples(live_graphs, list(graph.nodes()), list(graph.nodes()), np.ones(len(graph)))
        f_set = multi_to_set(f_multi, graph)
        return f_set
    
    f_set = genoptfunction(graph, samples)
    S, obj = greedy(list(range(len(graph))), BUDGET, f_set)

    f_set1 = genoptfunction(full_graph, samples)
    opt_obj = f_set1(S)

    return opt_obj, obj, S
def solve_it(input_data, method="", oporation="", solo=False, default=True):
    # Modify this code to run your optimization algorithm

    # parse the input
    lines = input_data.split('\n')

    nodeCount = int(lines[0])

    points = []
    for i in range(1, nodeCount + 1):
        line = lines[i]
        parts = line.split()
        points.append(Point(float(parts[0]), float(parts[1])))
    if len(points) > 30000 and default:
        print("greedy")
        return greedy(points)

    NPproblem_tsp = tsp.tsp(points)
    if default:
        if len(points) > 200:
            approx = NPproblem_tsp.christofides()
            return print_solution(NPproblem_tsp, approx, "simulatedAnneling")
        else:
            approx = NPproblem_tsp.gurobi_method()
            return approx

    if method == 'approximation2':
        approx = NPproblem_tsp.approximation2()
    elif method == 'christofides':
        approx = NPproblem_tsp.christofides()
    elif method == 'antSystem':
        Hormigas = AntSystem.AntSystem(points, 1, 2, 0.02, 10, nodeCount,
                                       nodeCount)
        algo = Hormigas.calcule_route()
        return verbose(algo[1], algo[0])
    elif method == 'gurobi':
        approx = NPproblem_tsp.gurobi_method()
        return approx
    else:
        approx = NPproblem_tsp.christofides()
    if solo:
        return verbose(approx[1], approx[0])
    else:
        return print_solution(NPproblem_tsp, approx, oporation)
def attacker_br_value(sigma_d, P, preferences, k_a):
    '''
    Find a greedy best response to a given defender mixed strategy, given as
    the uniform distribution over the list of sets sigma_d
    '''
    n = P.shape[0]
    p_defender_reach = np.zeros((P.shape[1]))
    #average over sigma_d to get the probability each voter is reached by the
    #defender
    for S_d in sigma_d:
        x = indicator(S_d, n)
        p_defender_reach += get_p_reached(x, P)
    p_defender_reach /= len(sigma_d)
    #call greedy for best response
    obj_attacker = partial(objective_budget_set,
                           P=P,
                           w=preferences * (1 - p_defender_reach))
    S_a, value = greedy(range(n), k_a, obj_attacker)
    return value
def oga(P, preferences, k_d, k_a, num_iter, step_size=0.01, verbose=False):
    '''
    Runs online gradient ascent from the perspective of the adversary, with 
    greedy best responses for the defender. Returns the list of best responses
    played by the defender (the uniform distribution over which is their historical
    mixed strategy)
    
    P_ij should be the probability channel i reaches voter j
    
    preferences is a 0-1 vector where 1 indicates that a voter preferes c_d
    
    k_d/k_a are the defender and attacker budgets
    '''
    #random initialization
    n = P.shape[0]
    x_a = project_uniform_matroid_boundary(np.random.rand((n)), k_a)
    #historical mixed strategy for defender
    sigma_d = []
    values = []
    for t in range(num_iter):
        #get current probability the attacker reaches each voter
        p_attacker_reach = get_p_reached(x_a, P)
        #solve best response for the defender
        obj_defender = partial(objective_budget_set,
                               P=P,
                               w=preferences * p_attacker_reach)
        S_d, _ = greedy(range(n), k_d, obj_defender)
        sigma_d.append(S_d)
        x_d = indicator(S_d, n)
        #gradient step + projection on the attacker's mixed strategy
        p_defender_reach = get_p_reached(x_d, P)
        grad_attacker = gradient_budget(x_a, P,
                                        preferences * (1 - p_defender_reach))
        objective_value = objective_budget(
            x_a, P, preferences * (1 - p_defender_reach))
        values.append(objective_value)
        if verbose:
            print(t, objective_value)
        x_a = project_uniform_matroid_boundary(x_a + step_size * grad_attacker,
                                               k_a)
    return sigma_d, x_a, values
Ejemplo n.º 9
0
#############################################################################################################
#
#   GREEDY SOLUTIONS
#
#############################################################################################################

if greedy:
    if verbose:
        print('###\tStart compute greedy solutions')
        start = timer()

    n_epochs = n_epoch[2]
    data = []
    for i in range(n_epochs):
        # compute greedy solution
        opt_sol = utils.greedy(test, i)
        data.append(opt_sol)

    utils.write_sol(data, 0, path_plot, sol, '')

    if verbose:
        end = timer()
        print('###\tTime for compute greedy solutions: %.2f sec' %
              (end - start))

#############################################################################################################
#
#   INVERSE GREEDY
#
#############################################################################################################
Ejemplo n.º 10
0
     live_graphs = sample_live_icm(g, 1000)
 
     group_indicator = np.ones((len(g.nodes()), 1))
     
     val_oracle = make_multilinear_objective_samples_group(live_graphs, group_indicator,  list(g.nodes()), list(g.nodes()), np.ones(len(g)))
     grad_oracle = make_multilinear_gradient_group(live_graphs, group_indicator,  list(g.nodes()), list(g.nodes()), np.ones(len(g)))
     
     def f_multi(x):
         return val_oracle(x, 1000).sum()
     
     
     #f_multi = make_multilinear_objective_samples(live_graphs, list(g.nodes()), list(g.nodes()), np.ones(len(g)))
     f_set = multi_to_set(f_multi)
     
     #find overall optimal solution
     S, obj = greedy(list(range(len(g))), budget, f_set)
 
     for attr_idx, attribute in enumerate(attributes):
         #all values taken by this attribute
         values = np.unique([g.node[v][attribute] for v in g.nodes()])
                     
         values = np.unique([g.node[v][attribute] for v in g.nodes()])
         nodes_attr = {}
         for vidx, val in enumerate(values):
             nodes_attr[val] = [v for v in g.nodes() if g.node[v][attribute] == val]
             group_size[graphname][attribute][run, vidx] = len(nodes_attr[val])
         
         opt_succession = {}
         if succession:
             for vidx, val in enumerate(values):
                 h = nx.subgraph(g, nodes_attr[val])