for i in range(len(state_list)): dealerFirstCard = state_list[i].dealerFirstCard().num*state_list[i].dealerFirstCard().sign sumOfmine = state_list[i].sumOfmine() action = action_list[i] # s = self.Q[dealerFirstCard,sumOfmine,action]*self.N[dealerFirstCard,sumOfmine,action] + reward self.N[dealerFirstCard,sumOfmine,action] += 1 alpha = 1/self.N[dealerFirstCard,sumOfmine,action] self.Q[dealerFirstCard,sumOfmine,action] = self.Q[dealerFirstCard,sumOfmine,action] + alpha*(reward - self.Q[dealerFirstCard,sumOfmine,action]) # self.Q[dealerFirstCard,sumOfmine,action] = s/self.N[dealerFirstCard,sumOfmine,action] return if __name__ == '__main__': env = env() agent = Egreedy_MC_agent() reward = 0 iteration = 50000 reward_list = [] for i in range(iteration): reward = 0 state_list = [] action_list = [] if i == 0: state = env.init_state() else: state = env.Restart()
epsilon=1, # 随机选择的初始概率 epsilon_decay=1E-5, # 随机选择的概率decay batch_size=256, lr=0.01, T=3, mem_size=50000) scores = [] for i in range(num_eposides): # 设定Env # graph_size = np.random.randint(50, 100) # seed_size = np.random.randint(10, 30) graph_size = 100 seed_size = 25 Env = env(graph_size=graph_size, seed_size=seed_size, edge_weight=0.1, random_edge_weight=False, network_model="BA") edge_index, edge_weight, x, mu, done = Env.reset() graph = Data(edge_index = torch.LongTensor(edge_index), edge_weight = torch.Tensor(edge_weight), mu = torch.Tensor(mu), x = torch.Tensor(x)) # to be stored graph_former_steps = [] graph_later_steps = [] action_steps = [] reward_steps = [] done_steps = [] steps_cntr = 0 # running eposide while not done:
def MSS(edge_index): """Networkx Greedy for MSS""" out_edge = edge_index.numpy() g = create_graph(out_edge) new_g = nx.Graph() for (u,v) in g.edges(): new_g.add_edge(u,v) new_g = complement(new_g) SS_list = list(nx.find_cliques(new_g)) MSS_list = max(SS_list,key=len) return MSS_list if __name__ == "__main__": num_episodes = 20000 #! Number could be changed env = env(train_size=40,i=1,episodes=num_episodes) #! n_step = 4 agent = Agent() scores = [] loss_list = [] """ Please note that after adding this Greedy Algorithm, your running time will be much longer!!! """ Greedy_filename = "greedy.txt" file_greedy = open(Greedy_filename,"w") score_filename = "score.txt"#! file_score = open(score_filename,"w")#! for i in tqdm(range(num_episodes)): score = 0 graph = env.reset(i,num_episodes)#! file_greedy.write("Episode: {:<4}, size_greedy: {:<4}, node_num:{:<4}, stable_set:{}\n".