def main(args): json_file = args.json_file json_files_train = args.json_files_train json_file_policy_train = args.json_file_PA_train json_file_policy_CS_train = args.json_file_CS_train with open('./config/deployment/' + json_file + '.json', 'r') as f: options = json.load(f) with open('./config/policy/' + json_file_policy_train + '.json', 'r') as f: options_policy = json.load(f) with open('./config/policy/' + json_file_policy_CS_train + '.json', 'r') as f: options_CS = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf for json_file_train in json_files_train: with open('./config/deployment/' + json_file_train + '.json', 'r') as f: options_train = json.load(f) included_train_episodes = [] tot_train_episodes = int(options_train['simulation']['total_samples'] / options_train['train_episodes']['T_train']) N = options['simulation']['N'] # Multi channel scenario, M denotes number of channels. if 'M' in options['simulation']: M = options['simulation']['M'] else: M = 1 # if N <=20: # for i in range(tot_train_episodes+1): # if i<=15 or i%5==0: # included_train_episodes.append(i) # else: included_train_episodes.append(tot_train_episodes) train_tot_simulations = options_train['simulation']['num_simulations'] tot_test_episodes = int(options['simulation']['total_samples'] / options['train_episodes']['T_train']) inner_train_networks = [[]] * tot_test_episodes for i in range(tot_test_episodes): inner_train_networks[i] = 0 # if options['simulation']['test_include'] == 'all': # inner_train_networks[i] = 0#list(range(train_tot_simulations)) # else: # inner_train_networks[i] = list(np.random.randint(0,train_tot_simulations,options['simulation']['test_include'])) ## Kumber of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params'][ 'alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 38.0 - 30 Pmax = np.power(10.0, Pmax_dB / 10) n0_dB = -114.0 - 30 noise_var = np.power(10.0, n0_dB / 10) for ep in included_train_episodes: # np.random.seed(500 + N + ep) file_path = './simulations/channel/%s_network%d' % (json_file, 0) data = np.load(file_path + '.npz') H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) time_calculating_strategy_takes = [] # Virtual neighbor placer policy = DQN.DQN(options, options_policy, N, M, Pmax, noise_var, seed=500 + N + ep) ## Our JSAC version uses a linear quantizer. strategy_translation = np.zeros(policy.power_levels) strategy_translation[0] = 0.0 # Tx power 0 # Calculate steps in dBm for i in range(1, policy.power_levels - 1): strategy_translation[i] = i * (Pmax / (policy.power_levels - 1)) strategy_translation[-1] = Pmax # strategy_translation = np.zeros(policy.power_levels) # strategy_translation[0] = 0.0 # Tx power 0 # Pmin_dB = 10.0-30 # # Calculate steps in dBm # strategy_translation_dB_step = (Pmax_dB-Pmin_dB)/(policy.power_levels-2) # for i in range(1,policy.power_levels-1): # strategy_translation[i] = np.power(10.0,((Pmin_dB+(i-1)*strategy_translation_dB_step))/10) # strategy_translation[-1] = Pmax time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] sum_rate_distributed_policy_episode = [] p_strategy_all_apisode = [] i_train = 0 # for i_train in range(len(inner_train_networks[0])): sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([], 2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array( p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) alpha_central = np.zeros((N, M)) for k in range(N): alpha_central[k, np.random.randint(M)] = 1 alpha_strategy = np.array( alpha_central) # strategy is a completely different object alpha_strategy_current = np.array(alpha_strategy) alpha_int_central = np.where(alpha_central == 1)[1].astype(int) alpha_int_strategy = np.array( alpha_central) # strategy is a completely different object alpha_int_strategy_current = np.array(alpha_int_strategy) # current CSI used to calculate the power allocation current_csi = 0 previous_csi = 0 p_strategy_all = [] alpha_strategy_all = [] alpha_int_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_updates(sess) # Start iterating voer time slots for sim in range(total_samples): # save an instance per training episode for testing purposes. if (sim % train_episodes['T_train'] == 0): train_network_idx = i_train #inner_train_networks[int(sim /train_episodes['T_train'])][i_train] model_destination = ( './simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt' % (json_file_train, json_file_policy_train, json_file_policy_CS_train, train_network_idx, ep)).replace('[', '').replace(']', '') policy.load(sess, model_destination) i_train += 1 i_train = i_train % train_tot_simulations # If at least one time slot passed to get experience if (sim % train_episodes['T_train'] > 1): # Each agent picks its strategy. for agent in range(N): current_local_state = policy.local_state( sim, agent, p_strategy_all, alpha_strategy_all, H_all_2, sum_rate_list_distributed_policy, weights) a_time = time.time() strategy = policy.act_noepsilon( sess, current_local_state, sim) time_calculating_strategy_takes.append( time.time() - a_time) # Pick the action p_strategy[agent] = strategy_translation[ strategy % policy.power_levels] alpha_strategy[agent, :] = np.zeros(M) alpha_strategy[agent, strategy // policy.power_levels] = 1 alpha_int_strategy[ agent] = strategy // policy.power_levels # Add current state to the short term memory to observe it during the next state policy.previous_state[ agent, :] = current_local_state policy.previous_action[agent] = strategy if (sim % train_episodes['T_train'] < 2): p_strategy = np.random.rand(N) alpha_strategy = np.zeros((N, M)) for k in range(N): alpha_strategy[k, np.random.randint(M)] = 1 alpha_int_strategy = np.where( alpha_strategy == 1)[1].astype(int) p_strategy_current = np.array(p_strategy) alpha_strategy_current = np.array(alpha_strategy) alpha_int_strategy_current = np.array( alpha_int_strategy).astype(int) for m in range(M): policy.prev_suminterferences[:, m] = np.matmul( H_all_2[sim][:, :, m], alpha_strategy[:, m] * p_strategy) - (H_all_2[sim][:, :, m].diagonal() * alpha_strategy[:, m] * p_strategy) + noise_var # sims_pos_p[np.where(p_strategy_current>0)] = sim sum_rate_list_distributed_policy.append( pb.reward_helper(H_all[sim], p_strategy, alpha_strategy, noise_var, Pmax)) weights.append(np.array(np.ones(N))) sum_rate_distributed_policy.append( pb.sumrate_multi_weighted_clipped( H_all[sim], p_strategy, alpha_strategy, noise_var, weights[sim])) p_strategy_all.append(p_strategy_current) alpha_strategy_all.append(alpha_strategy_current) alpha_int_strategy_all.append(alpha_int_strategy_current) if (sim % 2500 == 0): print('Test time %d' % (sim)) sum_rate_distributed_policy_episode.append( copy.copy(sum_rate_distributed_policy)) p_strategy_all_apisode.append(copy.copy(p_strategy_all)) # End Train Phase np_save_path = './simulations/sumrate/test/%s_%s_%s_%s_episode%d.ckpt' % ( json_file, json_file_train, json_file_policy_train, json_file_policy_CS_train, ep) print(np_save_path) np.savez(np_save_path, options, options_policy, sum_rate_distributed_policy_episode, p_strategy_all_apisode, time_optimization_at_each_slot_takes, time_calculating_strategy_takes, included_train_episodes, inner_train_networks)
def main(args): json_file = args.json_file json_files_train = args.json_files_train json_file_policy_train = args.json_file_PA_train json_file_policy_CS_train = args.json_file_CS_train with open ('./config/deployment/'+json_file+'.json','r') as f: options = json.load(f) with open ('./config/policy/'+json_file_policy_train+'.json','r') as f: options_policy = json.load(f) with open ('./config/policy/'+json_file_policy_CS_train+'.json','r') as f: options_CS = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf for json_file_train in json_files_train: with open ('./config/deployment/'+json_file_train+'.json','r') as f: options_train = json.load(f) included_train_episodes = [] tot_train_episodes = int(options_train['simulation']['total_samples']/options_train['train_episodes']['T_train']) N = options['simulation']['N'] # Multi channel scenario, M denotes number of channels. if'M' in options['simulation']: M = options['simulation']['M'] else: M = 1 # if N <=20: # for i in range(tot_train_episodes+1): # if i<=15 or i%5==0: # included_train_episodes.append(i) # else: included_train_episodes.append(tot_train_episodes) train_tot_simulations = options_train['simulation']['num_simulations'] tot_test_episodes = int(options['simulation']['total_samples']/options['train_episodes']['T_train']) inner_train_networks = [[]]*tot_test_episodes for i in range(tot_test_episodes): inner_train_networks[i] = 0 # if options['simulation']['test_include'] == 'all': # inner_train_networks[i] = 0#list(range(train_tot_simulations)) # else: # inner_train_networks[i] = list(np.random.randint(0,train_tot_simulations,options['simulation']['test_include'])) ## Kumber of samples total_samples = options['simulation']['total_samples'] # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params']['alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 38.0-30 Pmax = np.power(10.0,Pmax_dB/10) n0_dB = -114.0-30 noise_var = np.power(10.0,n0_dB/10) # Hyper aprameters neightresh = noise_var*options_policy['neightresh'] i_train = -1 for ep in included_train_episodes: # np.random.seed(500 + N + ep) # i_train = np.random.randint(train_tot_simulations) i_train+=1 i_train = i_train % train_tot_simulations file_path = './simulations/channel/%s_network%d'%(json_file,0) data = np.load(file_path+'.npz') H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) time_calculating_strategy_takes = [] # Virtual neighbor placer neighbors_in = collections.deque([],2) neighbors = collections.deque([],2) sims_pos_p = np.zeros(N).astype(int) - 1 policy = DDPG.DDPG(options,options_policy,options_CS,N,M,Pmax,noise_var, seed=500 + N + ep) time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] sum_rate_distributed_policy_episode = [] p_strategy_all_episode = [] # for i_train in range(len(inner_train_networks[0])): sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([],2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array(p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) alpha_central = np.zeros((N,M)) for k in range(N): alpha_central[k,np.random.randint(M)] = 1 alpha_strategy = np.array(alpha_central) # strategy is a completely different object alpha_strategy_current = np.array(alpha_strategy) alpha_int_central = np.where(alpha_central==1)[1].astype(int) alpha_int_strategy = np.array(alpha_central) # strategy is a completely different object alpha_int_strategy_current = np.array(alpha_int_strategy) p_strategy_all=[] alpha_strategy_all = [] alpha_int_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_critic_updates(sess) policy.initialize_actor_updates(sess) # Start iterating voer time slots for sim in range (total_samples): # save an instance per training episode for testing purposes. if(sim %train_episodes['T_train'] == 0): train_network_idx = i_train#inner_train_networks[int(sim /train_episodes['T_train'])][i_train] model_destination = ('./simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'%( json_file_train,json_file_policy_train,json_file_policy_CS_train,train_network_idx,ep)).replace('[','').replace(']','') policy.load(sess,model_destination) i_train+=1 i_train = i_train % train_tot_simulations # If at least one time slot passed to get experience if (sim %train_episodes['T_train'] > 1): # Each agent picks its strategy. for agent in range (N): # Channel Selection # current_local_state = policy.local_state(sim,agent,p_strategy_all,alpha_strategy_all,alpha_int_strategy_all,H_all_2,sum_rate_list_distributed_policy,weights) a_time = time.time() CSstrategy = policy.CSact_noepsilon(sess,current_local_state,sim) selected_channel = int(CSstrategy) current_singlechannel_state = current_local_state[selected_channel*policy.DDPGnum_input:(selected_channel+1)*policy.DDPGnum_input] # if sim > 1000 and forcezero: # print('aaa') PAstrategy = policy.PAact_noepsilon(sess,current_singlechannel_state,sim) time_calculating_strategy_takes.append(time.time()-a_time) # if sim == 200: # print('debug') # Pick the action p_strategy[agent] = policy.Pmax * PAstrategy #** 10 # p_strategy[agent] = policy.Pmax * np.round(PAstrategy,2) #** 10 alpha_strategy[agent,:] = np.zeros(M) alpha_strategy[agent,CSstrategy] = 1 alpha_int_strategy[agent] = selected_channel # Add current state to the short term memory to observe it during the next state policy.previous_state[agent,:] = current_singlechannel_state policy.previous_action[agent] = PAstrategy policy.DQNprevious_state[agent,:] = current_local_state policy.DQNprevious_action[agent] = CSstrategy if(sim %train_episodes['T_train'] < 2): p_strategy = np.random.rand(N) alpha_strategy = np.zeros((N,M)) for k in range(N): alpha_strategy[k,np.random.randint(M)] = 1 alpha_int_strategy = np.where(alpha_strategy==1)[1].astype(int) p_strategy_current = np.array(p_strategy) alpha_strategy_current = np.array(alpha_strategy) alpha_int_strategy_current = np.array(alpha_int_strategy).astype(int) for m in range(M): policy.prev_suminterferences[:,m] = np.matmul(H_all_2[sim][:,:,m],alpha_strategy[:,m]*p_strategy) - (H_all_2[sim][:,:,m].diagonal()*alpha_strategy[:,m]*p_strategy) + noise_var if M > 1: policy.sorted_channels = np.argsort(H_all_2[sim][np.arange(N),np.arange(N),:]/policy.prev_suminterferences)/float(M) # sims_pos_p[np.where(p_strategy_current>0)] = sim # tmp_neighbors_in = [] # tmp_neighbors = [] # for nei_i in range(N): # neigh_tmp_variab = np.where((H_all[sim][nei_i,:]**2)*p_strategy_current>neightresh) # neigh_tmp_variab = np.delete(neigh_tmp_variab,np.where(neigh_tmp_variab[0]==nei_i)) # tmp_neighbors_in.append(neigh_tmp_variab) # for nei_i in range(N): # tmp_neighlist = [] # for nei_j in range(N): # if(len(np.where(tmp_neighbors_in[nei_j]==nei_i)[0]) != 0): # tmp_neighlist.append(nei_j) # if (len(tmp_neighlist) == 0 and len(neighbors) >0): # tmp_neighbors.append(np.array(neighbors[-1][nei_i])) # else: # tmp_neighbors.append(np.array(tmp_neighlist)) # neighbors.append(tmp_neighbors) # neighbors_in.append(tmp_neighbors_in) # all sumrates in a list sum_rate_list_distributed_policy.append(pb.reward_helper(H_all[sim],p_strategy,alpha_strategy,noise_var,Pmax)) weights.append(np.array(np.ones(N))) sum_rate_distributed_policy.append(pb.sumrate_multi_weighted_clipped(H_all[sim],p_strategy,alpha_strategy,noise_var,weights[sim])) p_strategy_all.append(p_strategy_current) alpha_strategy_all.append(alpha_strategy_current) alpha_int_strategy_all.append(alpha_int_strategy_current) if(sim%2500 == 0): print('Test time %d'%(sim)) sum_rate_distributed_policy_episode.append(copy.copy(sum_rate_distributed_policy)) p_strategy_all_episode.append(copy.copy(p_strategy_all)) # End Train Phase np_save_path = './simulations/sumrate/test/%s_%s_%s_%s_episode%d.ckpt'%(json_file,json_file_train,json_file_policy_train,json_file_policy_CS_train,ep) print('Saved to %s'%(np_save_path)) np.savez(np_save_path,options,options_policy,sum_rate_distributed_policy_episode,p_strategy_all_episode, time_optimization_at_each_slot_takes,time_calculating_strategy_takes,included_train_episodes,inner_train_networks)
def main(args): json_file = args.json_file json_files_train = args.json_files_train json_file_policy_train = args.json_file_policy_train with open('./config/deployment/' + json_file + '.json', 'r') as f: options = json.load(f) with open('./config/policy/' + json_file_policy_train + '.json', 'r') as f: options_policy = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf for json_file_train in json_files_train: with open('./config/deployment/' + json_file_train + '.json', 'r') as f: options_train = json.load(f) included_train_episodes = [] tot_train_episodes = int(options_train['simulation']['total_samples'] / options_train['train_episodes']['T_train']) N = options['simulation']['N'] if N <= 20: for i in range(tot_train_episodes + 1): if i <= 15 or i % 5 == 0: included_train_episodes.append(i) else: included_train_episodes.append(tot_train_episodes) train_tot_simulations = options_train['simulation']['num_simulations'] tot_test_episodes = int(options['simulation']['total_samples'] / options['train_episodes']['T_train']) inner_train_networks = [[]] * tot_test_episodes for i in range(tot_test_episodes): if options['simulation']['test_include'] == 'all': inner_train_networks[i] = 0 else: inner_train_networks[i] = list( np.random.randint(0, train_tot_simulations, options['simulation']['test_include'])) ## Kumber of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params'][ 'alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 46.0 - 30 Pmax = np.power(10.0, Pmax_dB / 10) n0_dB = -104.0 - 30 noise_var = np.power(10.0, n0_dB / 10) # Hyper aprameters neightresh = noise_var * options_policy['neightresh'] for ep in included_train_episodes: # file_path = './simulations/channel/%s_network%d' % (json_file, 0) data = np.load(file_path + '.npz') H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) time_calculating_strategy_takes = [] # Virtual neighbor placer neighbors_in = collections.deque([], 2) neighbors = collections.deque([], 2) sims_pos_p = np.zeros(N).astype(int) - 1 policy = DQN.DQN(options, options_policy, N, Pmax, noise_var) strategy_translation = np.zeros(policy.num_actions) strategy_translation[0] = 0.0 # Tx power 0 Pmin_dB = 10.0 - 30 # Calculate steps in dBm strategy_translation_dB_step = (Pmax_dB - Pmin_dB) / (policy.num_actions - 2) for i in range(1, policy.num_actions - 1): strategy_translation[i] = np.power( 10.0, ((Pmin_dB + (i - 1) * strategy_translation_dB_step)) / 10) strategy_translation[-1] = Pmax time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] sum_rate_distributed_policy_episode = [] p_strategy_all_apisode = [] i_train = 0 sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([], 2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array( p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) p_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_updates(sess) # Start iterating voer time slots for sim in range(total_samples): # save an instance per training episode for testing purposes. if (sim % train_episodes['T_train'] == 0): train_network_idx = i_train model_destination = ( './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' % (json_file_train, json_file_policy_train, train_network_idx, ep)).replace('[', '').replace( ']', '') policy.load(sess, model_destination) i_train += 1 i_train = i_train % train_tot_simulations # If at least one time slot passed to get experience if (sim % train_episodes['T_train'] > 1): # Each agent picks its strategy. for agent in range(N): current_local_state = policy.local_state( sim, agent, p_strategy_all, H_all_2, neighbors, neighbors_in, sum_rate_list_distributed_policy, sims_pos_p) a_time = time.time() strategy = policy.act_noepsilon( sess, current_local_state, sim) time_calculating_strategy_takes.append( time.time() - a_time) # Pick the action p_strategy[agent] = strategy_translation[strategy] # Add current state to the short term memory to observe it during the next state policy.previous_state[ agent, :] = current_local_state policy.previous_action[agent] = strategy if (sim % train_episodes['T_train'] < 2): p_strategy = Pmax * np.ones(N) #np.random.rand(N) p_strategy_current = np.array(p_strategy) policy.prev_suminterferences = np.matmul( H_all_2[sim], p_strategy) - (H_all_2[sim].diagonal() * p_strategy) + noise_var sims_pos_p[np.where(p_strategy_current > 0)] = sim tmp_neighbors_in = [] tmp_neighbors = [] for nei_i in range(N): neigh_tmp_variab = np.where( (H_all[sim][nei_i, :]**2) * p_strategy_current > neightresh) neigh_tmp_variab = np.delete( neigh_tmp_variab, np.where(neigh_tmp_variab[0] == nei_i)) tmp_neighbors_in.append(neigh_tmp_variab) for nei_i in range(N): tmp_neighlist = [] for nei_j in range(N): if (len( np.where( tmp_neighbors_in[nei_j] == nei_i)[0]) != 0): tmp_neighlist.append(nei_j) if (len(tmp_neighlist) == 0 and len(neighbors) > 0): tmp_neighbors.append(np.array( neighbors[-1][nei_i])) else: tmp_neighbors.append(np.array(tmp_neighlist)) neighbors.append(tmp_neighbors) neighbors_in.append(tmp_neighbors_in) # all sumrates in a list sum_rate_list_distributed_policy.append( pb.reward_helper(H_all[sim], p_strategy, N, noise_var, Pmax, neighbors_in[-1])) sum_rate_distributed_policy.append( pb.sumrate_weighted_clipped(H_all[sim], p_strategy, N, noise_var, weights[sim])) p_strategy_all.append(np.array(p_strategy)) if (sim % 2500 == 0): print('Test time %d' % (sim)) sum_rate_distributed_policy_episode.append( copy.copy(sum_rate_distributed_policy)) p_strategy_all_apisode.append(copy.copy(p_strategy_all)) # End Train Phase np_save_path = './simulations/sumrate/test/%s_%s_%s_episode%d.ckpt' % ( json_file, json_file_train, json_file_policy_train, ep) print(np_save_path) np.savez(np_save_path, options, options_policy, sum_rate_distributed_policy_episode, p_strategy_all_apisode, time_optimization_at_each_slot_takes, time_calculating_strategy_takes, included_train_episodes, inner_train_networks)
def main(args): json_file = args.json_file json_file_policy = args.json_file_policy num_sim = args.num_sim with open('./config/deployment/' + json_file + '.json', 'r') as f: options = json.load(f) with open('./config/policy/' + json_file_policy + '.json', 'r') as f: options_policy = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf ## Kumber of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] if num_sim == -1: num_simulations = options['simulation']['num_simulations'] simulation = options['simulation']['simulation_index_start'] else: num_simulations = 1 simulation = num_sim # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params'][ 'alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 38.0 - 30 Pmax = np.power(10.0, Pmax_dB / 10) n0_dB = -114.0 - 30 noise_var = np.power(10.0, n0_dB / 10) # Hyper aprameters N_neighbors = options_policy['N_neighbors'] neightresh = noise_var * options_policy['neightresh'] for overal_sims in range(simulation, simulation + num_simulations): tf.reset_default_graph() tf.set_random_seed(100 + overal_sims) np.random.seed(100 + overal_sims) file_path = './simulations/channel/%s_network%d' % (json_file, overal_sims) data = np.load(file_path + '.npz', allow_pickle=True) H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) time_calculating_strategy_takes = [] # Virtual neighbor placer neighbors_in = collections.deque([], 2) neighbors = collections.deque([], 2) sims_pos_p = np.zeros(N).astype(int) - 1 policy = DDPG.DDPG(options, options_policy, N, Pmax, noise_var) # Start the simulation 2 # Sum rate for the simulation 1 sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([], 2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array( p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] p_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_critic_updates(sess) policy.initialize_actor_updates(sess) # Start iterating voer time slots for sim in range(total_samples): policy.check_memory_restart(sess, sim) policy.update_handler(sess, sim) # save an instance per training episode for testing purposes. if (sim % train_episodes['T_train'] == 0): model_destination = ( './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' % (json_file, json_file_policy, overal_sims, int(float(sim) / train_episodes['T_train']))).replace( '[', '').replace(']', '') policy.save(sess, model_destination) # If at least one time slot passed to get experience if (sim % train_episodes['T_train'] > 1): # Each agent picks its strategy. for agent in range(N): current_local_state = policy.local_state( sim, agent, p_strategy_all, H_all_2, neighbors, neighbors_in, sum_rate_list_distributed_policy, sims_pos_p) a_time = time.time() strategy = policy.act(sess, current_local_state, sim, agent) time_calculating_strategy_takes.append(time.time() - a_time) if (sim % train_episodes['T_train'] > 2 ): # Koew, There is prev state to form experience. sorted_neighbors_criteria = np.log10( H_all_2[sim - 1][np.array(neighbors[-1][agent]), agent] / policy.prev_suminterferences[neighbors[-1] [agent]]) sorted_neighbors = neighbors[-1][agent][np.argsort( sorted_neighbors_criteria)[::-1]] if len(sorted_neighbors) > N_neighbors: sorted_neighbors = sorted_neighbors[: N_neighbors] sorted_neighbors = np.append( sorted_neighbors, agent) current_reward = np.sum( np.multiply( weights[sim - 1], sum_rate_list_distributed_policy[-1] [:, agent])[sorted_neighbors]) policy.remember(agent, current_local_state, current_reward) # Only train it once per timeslot agent == 0 ensures that if agent == ( N - 1 ): # If there is enough data to create a mini batch a_time = time.time() # TRAIK for a minibatch policy.train(sess, sim) time_optimization_at_each_slot_takes.append( time.time() - a_time) # Pick the action p_strategy[agent] = policy.Pmax * strategy #** 10 # Add current state to the short term memory to observe it during the next state policy.previous_state[agent, :] = current_local_state policy.previous_action[agent] = strategy if (sim % train_episodes['T_train'] < 2): p_strategy = np.random.rand(N) p_strategy_current = np.array(p_strategy) policy.prev_suminterferences = np.matmul( H_all_2[sim], p_strategy) - (H_all_2[sim].diagonal() * p_strategy) + noise_var sims_pos_p[np.where(p_strategy_current > 0)] = sim tmp_neighbors_in = [] tmp_neighbors = [] for nei_i in range(N): neigh_tmp_variab = np.where( (H_all[sim][nei_i, :]**2) * p_strategy_current > neightresh) neigh_tmp_variab = np.delete( neigh_tmp_variab, np.where(neigh_tmp_variab[0] == nei_i)) tmp_neighbors_in.append(neigh_tmp_variab) for nei_i in range(N): tmp_neighlist = [] for nei_j in range(N): if (len(np.where(tmp_neighbors_in[nei_j] == nei_i)[0]) != 0): tmp_neighlist.append(nei_j) if (len(tmp_neighlist) == 0 and len(neighbors) > 0): tmp_neighbors.append(np.array(neighbors[-1][nei_i])) else: tmp_neighbors.append(np.array(tmp_neighlist)) neighbors.append(tmp_neighbors) neighbors_in.append(tmp_neighbors_in) # all sumrates in a list sum_rate_list_distributed_policy.append( pb.reward_helper(H_all[sim], p_strategy, N, noise_var, Pmax, neighbors_in[-1])) sum_rate_distributed_policy.append( pb.sumrate_weighted_clipped(H_all[sim], p_strategy, N, noise_var, weights[sim])) p_strategy_all.append(np.array(p_strategy)) if (sim % 2500 == 0): print('Time %d sim %d' % (sim, overal_sims)) policy.equalize(sess) print('Train is over sim %d' % (overal_sims)) model_destination = ( './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' % (json_file, json_file_policy, overal_sims, int(float(total_samples) / train_episodes['T_train']))).replace('[', '').replace(']', '') policy.save(sess, model_destination) # End Train Phase np_save_path = './simulations/sumrate/train/%s_%s_network%d.ckpt' % ( json_file, json_file_policy, overal_sims) print(np_save_path) np.savez(np_save_path, options, options_policy, sum_rate_distributed_policy, p_strategy_all, time_optimization_at_each_slot_takes, time_calculating_strategy_takes)
def main(args): json_file = args.json_file json_file_policy = args.json_file_PA json_file_CS = args.json_file_CS num_sim = args.num_sim with open ('./config/deployment/'+json_file+'.json','r') as f: options = json.load(f) with open ('./config/policy/'+json_file_policy+'.json','r') as f: options_policy = json.load(f) with open ('./config/policy/'+json_file_CS+'.json','r') as f: options_CS = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf import random ## Number of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] # Multi channel scenario, M denotes number of channels. if'M' in options['simulation']: M = options['simulation']['M'] else: M = 1 # PFS set to true means that we save log average sum-rate instead of sum-rate pfs = False if'pfs' in options['simulation']: pfs = options['simulation']['pfs'] beta = 0.01 if num_sim == -1: num_simulations = options['simulation']['num_simulations'] simulation = options['simulation']['simulation_index_start'] else: num_simulations = 1 simulation = num_sim # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params']['alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 38.0-30 Pmax = np.power(10.0,Pmax_dB/10) n0_dB = -114.0-30 noise_var = np.power(10.0,n0_dB/10) # Hyper aprameters N_neighbors = options_policy['N_neighbors'] neightresh = noise_var*options_policy['neightresh'] forcezero = False for overal_sims in range(simulation,simulation+num_simulations): tf.reset_default_graph() np.random.seed(100+overal_sims) random.seed(100+overal_sims) tf.set_random_seed(100+overal_sims) file_path = './simulations/channel/%s_network%d'%(json_file,overal_sims) data = np.load(file_path+'.npz',allow_pickle=True) H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] time_calculating_strategy_takes = [] # # Virtual neighbor placer # neighbors_in = collections.deque([],2) # neighbors = collections.deque([],2) # sims_pos_p = np.zeros(N).astype(int) - 1 policy = DDPG.DDPG(options,options_policy,options_CS,N,M,Pmax,noise_var, seed=100+overal_sims) # Start the simulation 2 # Sum rate for the simulation 1 sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([],2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array(p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) alpha_central = np.zeros((N,M)) for k in range(N): alpha_central[k,np.random.randint(M)] = 1 alpha_strategy = np.array(alpha_central) # strategy is a completely different object alpha_strategy_current = np.array(alpha_strategy) alpha_int_central = np.where(alpha_central==1)[1].astype(int) alpha_int_strategy = np.array(alpha_central) # strategy is a completely different object alpha_int_strategy_current = np.array(alpha_int_strategy) time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] p_strategy_all=[] alpha_strategy_all = [] alpha_int_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_critic_updates(sess) policy.initialize_actor_updates(sess) policy.initialize_DQNupdates(sess) # Start iterating voer time slots for sim in range (total_samples): policy.check_memory_restart(sess,sim) policy.update_handler(sess,sim) # save an instance per training episode for testing purposes. if(sim %train_episodes['T_train'] == 0): model_destination = ('./simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'%( json_file,json_file_policy,json_file_CS,overal_sims,int(float(sim)/train_episodes['T_train']))).replace('[','').replace(']','') policy.save(sess,model_destination) # If at least one time slot passed to get experience if (sim %train_episodes['T_train'] > 49): # Each agent picks its strategy. for agent in range (N): # Channel Selection # current_local_state = policy.local_state(sim,agent,p_strategy_all,alpha_strategy_all,alpha_int_strategy_all,H_all_2,sum_rate_list_distributed_policy,weights) a_time = time.time() CSstrategy = policy.CSact(sess,current_local_state,sim) selected_channel = int(CSstrategy) current_singlechannel_state = current_local_state[selected_channel*policy.DDPGnum_input:(selected_channel+1)*policy.DDPGnum_input] # if sim > 1000 and forcezero: # print('aaa') PAstrategy = policy.PAact(sess,current_singlechannel_state,sim,forcezero=forcezero) time_calculating_strategy_takes.append(time.time()-a_time) if (sim %train_episodes['T_train'] > 50): # Koew, There is prev state to form experience. # sorted_neighbors_criteria = np.log10(H_all_2[sim-1][np.array(neighbors[-1][agent]),agent]/policy.prev_suminterferences[neighbors[-1][agent]]) # sorted_neighbors = neighbors[-1][agent][np.argsort(sorted_neighbors_criteria)[::-1]] # if len(sorted_neighbors)>N_neighbors: # sorted_neighbors = sorted_neighbors[:N_neighbors] # sorted_neighbors = np.append(sorted_neighbors,agent) # sorted_interfereds = np.argsort(H_all_2[sim-1][:,agent,alpha_int_strategy_all[-1][agent]])[::-1] sorted_interfereds_all = np.argsort(H_all_2[sim-1][:,agent,alpha_int_strategy_all[-1][agent]]/policy.prev_suminterferences[:,alpha_int_strategy_all[-1][agent]])[::-1] sorted_interfereds_all = np.delete(sorted_interfereds_all,np.where(sorted_interfereds_all==agent)) sorted_interfereds = np.hstack((np.setdiff1d(sorted_interfereds_all,np.where(alpha_strategy_all[-1][:,alpha_int_strategy_all[-1][agent]]==0),assume_unique=True), np.setdiff1d(sorted_interfereds_all,np.where(alpha_strategy_all[-1][:,alpha_int_strategy_all[-1][agent]]==1),assume_unique=True))) # current_reward = min(10,max(-10,np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]])))) # if forcezero: sorted_interfereds_and_agent = np.delete(sorted_interfereds,np.where(sorted_interfereds==agent))#[:policy.N_neighbors] # else: sorted_interfereds_and_agent = np.append(np.delete(sorted_interfereds,np.where(sorted_interfereds==agent)),agent)#[:policy.N_neighbors],agent) sorted_interfereds_and_agent = np.append(np.delete(sorted_interfereds,np.where(sorted_interfereds==agent))[:policy.N_neighbors],agent) if not pfs: current_reward = np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]])) # else: current_reward = np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]])) # else: current_reward = min(10,max(-5,np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]])))) else: current_reward = np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]])) # if forcezero: current_reward -= max(sum_rate_list_distributed_policy[-1][np.arange(N),np.arange(N),alpha_int_strategy_all[-1]]) if forcezero: current_reward -= weights[-1][agent]*sum_rate_list_distributed_policy[-1][agent,agent,alpha_int_strategy_all[-1][agent]] if forcezero: current_reward -= 5 # if forcezero: # for repeat in range(5): # policy.CSremember(agent,current_local_state,current_reward) # policy.PAremember(agent,current_local_state[alpha_int_strategy_all[-1][agent]*policy.DDPGnum_input:(alpha_int_strategy_all[-1][agent]+1)*policy.DDPGnum_input],current_reward) # else: policy.CSremember(agent,current_local_state,current_reward) policy.PAremember(agent,current_local_state[alpha_int_strategy_all[-1][agent]*policy.DDPGnum_input:(alpha_int_strategy_all[-1][agent]+1)*policy.DDPGnum_input],current_reward) # Only train it once per timeslot agent == 0 ensures that if agent == (N-1): # If there is enough data to create a mini batch a_time = time.time() # TRAIN for a minibatch policy.train(sess,sim) time_optimization_at_each_slot_takes.append(time.time()-a_time) # if sim == 200: # print('debug') # Pick the action p_strategy[agent] = policy.Pmax * PAstrategy #** 10 # p_strategy[agent] = policy.Pmax * np.round(PAstrategy,2) #** 10 alpha_strategy[agent,:] = np.zeros(M) alpha_strategy[agent,CSstrategy] = 1 alpha_int_strategy[agent] = selected_channel # Add current state to the short term memory to observe it during the next state policy.previous_state[agent,:] = current_singlechannel_state policy.previous_action[agent] = PAstrategy policy.DQNprevious_state[agent,:] = current_local_state policy.DQNprevious_action[agent] = CSstrategy if(sim %train_episodes['T_train'] < 50): p_strategy = np.random.rand(N) alpha_strategy = np.zeros((N,M)) for k in range(N): alpha_strategy[k,np.random.randint(M)] = 1 alpha_int_strategy = np.where(alpha_strategy==1)[1].astype(int) p_strategy_current = np.array(p_strategy) alpha_strategy_current = np.array(alpha_strategy) alpha_int_strategy_current = np.array(alpha_int_strategy).astype(int) for m in range(M): policy.prev_suminterferences[:,m] = np.matmul(H_all_2[sim][:,:,m],alpha_strategy[:,m]*p_strategy) - (H_all_2[sim][:,:,m].diagonal()*alpha_strategy[:,m]*p_strategy) + noise_var if M > 1: policy.sorted_channels = np.argsort(H_all_2[sim][np.arange(N),np.arange(N),:]/policy.prev_suminterferences)/float(M) # sims_pos_p[np.where(p_strategy_current>0)] = sim # tmp_neighbors_in = [] # tmp_neighbors = [] # for nei_i in range(N): # neigh_tmp_variab = np.where((H_all[sim][nei_i,:]**2)*p_strategy_current>neightresh) # neigh_tmp_variab = np.delete(neigh_tmp_variab,np.where(neigh_tmp_variab[0]==nei_i)) # tmp_neighbors_in.append(neigh_tmp_variab) # for nei_i in range(N): # tmp_neighlist = [] # for nei_j in range(N): # if(len(np.where(tmp_neighbors_in[nei_j]==nei_i)[0]) != 0): # tmp_neighlist.append(nei_j) # if (len(tmp_neighlist) == 0 and len(neighbors) >0): # tmp_neighbors.append(np.array(neighbors[-1][nei_i])) # else: # tmp_neighbors.append(np.array(tmp_neighlist)) # neighbors.append(tmp_neighbors) # neighbors_in.append(tmp_neighbors_in) # all sumrates in a list sum_rate_list_distributed_policy.append(pb.reward_helper(H_all[sim],p_strategy,alpha_strategy,noise_var,Pmax)) if not pfs: weights.append(np.array(np.ones(N))) sum_rate_distributed_policy.append(pb.sumrate_multi_weighted_clipped(H_all[sim],p_strategy,alpha_strategy,noise_var,weights[sim])) else: rates = sum_rate_list_distributed_policy[-1][np.arange(N),np.arange(N),alpha_int_strategy_current] if sim % train_episodes['T_train'] == 0: # Restart average_sum_rate = np.array(rates) else: average_sum_rate = (1.0-beta)*average_sum_rate+beta*np.array(rates) weights.append(np.array([1.0/i for i in average_sum_rate])) sum_rate_distributed_policy.append(np.sum(np.log(average_sum_rate))) p_strategy_all.append(p_strategy_current) alpha_strategy_all.append(alpha_strategy_current) alpha_int_strategy_all.append(alpha_int_strategy_current) if(sim%100 == 0): print('Time %d sim %d'%(sim,overal_sims)) if sum(p_strategy_all[-1]>=0.98*policy.Pmax)==policy.N: print('sim %d all 1'%(sim)) forcezero = True elif sum(p_strategy_all[-1]<=0.02*policy.Pmax)==policy.N: print('sim %d all 0'%(sim)) forcezero = True else: forcezero = False policy.equalize(sess) print('Train is over sim %d'%(overal_sims)) model_destination = ('./simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'%( json_file,json_file_policy,json_file_CS,overal_sims,int(float(total_samples)/train_episodes['T_train']))).replace('[','').replace(']','') policy.save(sess,model_destination) # End Train Phase np_save_path = './simulations/sumrate/train/%s_%s_%s_network%d.ckpt'%(json_file,json_file_policy,json_file_CS,overal_sims) print(np_save_path) np.savez(np_save_path,options,options_policy,sum_rate_distributed_policy,p_strategy_all,alpha_strategy_all, time_optimization_at_each_slot_takes,time_calculating_strategy_takes)