Example #1
0
        for i in range(len(state_list)):
            dealerFirstCard = state_list[i].dealerFirstCard().num*state_list[i].dealerFirstCard().sign 
            sumOfmine = state_list[i].sumOfmine()
            action = action_list[i]
#            s = self.Q[dealerFirstCard,sumOfmine,action]*self.N[dealerFirstCard,sumOfmine,action] + reward
            
            self.N[dealerFirstCard,sumOfmine,action] += 1
            alpha = 1/self.N[dealerFirstCard,sumOfmine,action]
            self.Q[dealerFirstCard,sumOfmine,action] = self.Q[dealerFirstCard,sumOfmine,action] + alpha*(reward - self.Q[dealerFirstCard,sumOfmine,action])
#            self.Q[dealerFirstCard,sumOfmine,action] = s/self.N[dealerFirstCard,sumOfmine,action]
        return


if __name__ == '__main__':
    env = env()
    agent = Egreedy_MC_agent()
    
    reward = 0
    iteration = 50000
    
    reward_list = []

    for i in range(iteration): 
        reward = 0
        state_list = []
        action_list = []
        if i == 0:
            state = env.init_state() 
        else:
            state = env.Restart()
Example #2
0
                  epsilon=1, # 随机选择的初始概率
                  epsilon_decay=1E-5, # 随机选择的概率decay
                  batch_size=256,
                  lr=0.01,
                  T=3,
                  mem_size=50000)
    scores = [] 
    
    
    for i in range(num_eposides):
        # 设定Env
        # graph_size = np.random.randint(50, 100)
        # seed_size = np.random.randint(10, 30)
        graph_size = 100
        seed_size = 25
        Env = env(graph_size=graph_size, seed_size=seed_size, edge_weight=0.1,
                  random_edge_weight=False, network_model="BA")
        edge_index, edge_weight, x, mu, done = Env.reset()
        graph = Data(edge_index = torch.LongTensor(edge_index),
                     edge_weight = torch.Tensor(edge_weight),
                     mu = torch.Tensor(mu),
                     x = torch.Tensor(x))
        # to be stored
        graph_former_steps = []
        graph_later_steps = []
        action_steps = []
        reward_steps = []
        done_steps = []
        steps_cntr = 0

        # running eposide
        while not done:
Example #3
0
def MSS(edge_index):
    """Networkx Greedy for MSS"""
    out_edge = edge_index.numpy()
    g = create_graph(out_edge)
    new_g = nx.Graph()
    for (u,v) in g.edges():
        new_g.add_edge(u,v)
    new_g = complement(new_g)
    SS_list = list(nx.find_cliques(new_g))
    MSS_list = max(SS_list,key=len)
    return MSS_list


if __name__ == "__main__":
    num_episodes = 20000 #! Number could be changed
    env = env(train_size=40,i=1,episodes=num_episodes) #!
    n_step = 4
    agent = Agent()
    scores = []
    loss_list = []
    """
    Please note that after adding this Greedy Algorithm, your running time will be much longer!!! 
    """
    Greedy_filename = "greedy.txt"
    file_greedy = open(Greedy_filename,"w")
    score_filename = "score.txt"#!
    file_score = open(score_filename,"w")#!
    for i in tqdm(range(num_episodes)):
        score = 0
        graph = env.reset(i,num_episodes)#!
        file_greedy.write("Episode: {:<4}, size_greedy: {:<4}, node_num:{:<4}, stable_set:{}\n".