def main(args): json_file = args.json_file json_files_train = args.json_files_train json_file_policy_train = args.json_file_policy_train with open('./config/deployment/' + json_file + '.json', 'r') as f: options = json.load(f) with open('./config/policy/' + json_file_policy_train + '.json', 'r') as f: options_policy = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf for json_file_train in json_files_train: with open('./config/deployment/' + json_file_train + '.json', 'r') as f: options_train = json.load(f) included_train_episodes = [] tot_train_episodes = int(options_train['simulation']['total_samples'] / options_train['train_episodes']['T_train']) N = options['simulation']['N'] if N <= 20: for i in range(tot_train_episodes + 1): if i <= 15 or i % 5 == 0: included_train_episodes.append(i) else: included_train_episodes.append(tot_train_episodes) train_tot_simulations = options_train['simulation']['num_simulations'] tot_test_episodes = int(options['simulation']['total_samples'] / options['train_episodes']['T_train']) inner_train_networks = [[]] * tot_test_episodes for i in range(tot_test_episodes): if options['simulation']['test_include'] == 'all': inner_train_networks[i] = 0 else: inner_train_networks[i] = list( np.random.randint(0, train_tot_simulations, options['simulation']['test_include'])) ## Kumber of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params'][ 'alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 46.0 - 30 Pmax = np.power(10.0, Pmax_dB / 10) n0_dB = -104.0 - 30 noise_var = np.power(10.0, n0_dB / 10) # Hyper aprameters neightresh = noise_var * options_policy['neightresh'] for ep in included_train_episodes: # file_path = './simulations/channel/%s_network%d' % (json_file, 0) data = np.load(file_path + '.npz') H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) time_calculating_strategy_takes = [] # Virtual neighbor placer neighbors_in = collections.deque([], 2) neighbors = collections.deque([], 2) sims_pos_p = np.zeros(N).astype(int) - 1 policy = DQN.DQN(options, options_policy, N, Pmax, noise_var) strategy_translation = np.zeros(policy.num_actions) strategy_translation[0] = 0.0 # Tx power 0 Pmin_dB = 10.0 - 30 # Calculate steps in dBm strategy_translation_dB_step = (Pmax_dB - Pmin_dB) / (policy.num_actions - 2) for i in range(1, policy.num_actions - 1): strategy_translation[i] = np.power( 10.0, ((Pmin_dB + (i - 1) * strategy_translation_dB_step)) / 10) strategy_translation[-1] = Pmax time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] sum_rate_distributed_policy_episode = [] p_strategy_all_apisode = [] i_train = 0 sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([], 2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array( p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) p_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_updates(sess) # Start iterating voer time slots for sim in range(total_samples): # save an instance per training episode for testing purposes. if (sim % train_episodes['T_train'] == 0): train_network_idx = i_train model_destination = ( './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' % (json_file_train, json_file_policy_train, train_network_idx, ep)).replace('[', '').replace( ']', '') policy.load(sess, model_destination) i_train += 1 i_train = i_train % train_tot_simulations # If at least one time slot passed to get experience if (sim % train_episodes['T_train'] > 1): # Each agent picks its strategy. for agent in range(N): current_local_state = policy.local_state( sim, agent, p_strategy_all, H_all_2, neighbors, neighbors_in, sum_rate_list_distributed_policy, sims_pos_p) a_time = time.time() strategy = policy.act_noepsilon( sess, current_local_state, sim) time_calculating_strategy_takes.append( time.time() - a_time) # Pick the action p_strategy[agent] = strategy_translation[strategy] # Add current state to the short term memory to observe it during the next state policy.previous_state[ agent, :] = current_local_state policy.previous_action[agent] = strategy if (sim % train_episodes['T_train'] < 2): p_strategy = Pmax * np.ones(N) #np.random.rand(N) p_strategy_current = np.array(p_strategy) policy.prev_suminterferences = np.matmul( H_all_2[sim], p_strategy) - (H_all_2[sim].diagonal() * p_strategy) + noise_var sims_pos_p[np.where(p_strategy_current > 0)] = sim tmp_neighbors_in = [] tmp_neighbors = [] for nei_i in range(N): neigh_tmp_variab = np.where( (H_all[sim][nei_i, :]**2) * p_strategy_current > neightresh) neigh_tmp_variab = np.delete( neigh_tmp_variab, np.where(neigh_tmp_variab[0] == nei_i)) tmp_neighbors_in.append(neigh_tmp_variab) for nei_i in range(N): tmp_neighlist = [] for nei_j in range(N): if (len( np.where( tmp_neighbors_in[nei_j] == nei_i)[0]) != 0): tmp_neighlist.append(nei_j) if (len(tmp_neighlist) == 0 and len(neighbors) > 0): tmp_neighbors.append(np.array( neighbors[-1][nei_i])) else: tmp_neighbors.append(np.array(tmp_neighlist)) neighbors.append(tmp_neighbors) neighbors_in.append(tmp_neighbors_in) # all sumrates in a list sum_rate_list_distributed_policy.append( pb.reward_helper(H_all[sim], p_strategy, N, noise_var, Pmax, neighbors_in[-1])) sum_rate_distributed_policy.append( pb.sumrate_weighted_clipped(H_all[sim], p_strategy, N, noise_var, weights[sim])) p_strategy_all.append(np.array(p_strategy)) if (sim % 2500 == 0): print('Test time %d' % (sim)) sum_rate_distributed_policy_episode.append( copy.copy(sum_rate_distributed_policy)) p_strategy_all_apisode.append(copy.copy(p_strategy_all)) # End Train Phase np_save_path = './simulations/sumrate/test/%s_%s_%s_episode%d.ckpt' % ( json_file, json_file_train, json_file_policy_train, ep) print(np_save_path) np.savez(np_save_path, options, options_policy, sum_rate_distributed_policy_episode, p_strategy_all_apisode, time_optimization_at_each_slot_takes, time_calculating_strategy_takes, included_train_episodes, inner_train_networks)
def main(args): json_file = args.json_file json_file_policy = args.json_file_policy num_sim = args.num_sim with open('./config/deployment/' + json_file + '.json', 'r') as f: options = json.load(f) with open('./config/policy/' + json_file_policy + '.json', 'r') as f: options_policy = json.load(f) if not options_policy['cuda']: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf ## Kumber of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] if num_sim == -1: num_simulations = options['simulation']['num_simulations'] simulation = options['simulation']['simulation_index_start'] else: num_simulations = 1 simulation = num_sim # simulation parameters train_episodes = options['train_episodes'] mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params'][ 'alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 38.0 - 30 Pmax = np.power(10.0, Pmax_dB / 10) n0_dB = -114.0 - 30 noise_var = np.power(10.0, n0_dB / 10) # Hyper aprameters N_neighbors = options_policy['N_neighbors'] neightresh = noise_var * options_policy['neightresh'] for overal_sims in range(simulation, simulation + num_simulations): tf.reset_default_graph() tf.set_random_seed(100 + overal_sims) np.random.seed(100 + overal_sims) file_path = './simulations/channel/%s_network%d' % (json_file, overal_sims) data = np.load(file_path + '.npz', allow_pickle=True) H_all = data['arr_1'] H_all_2 = [] for i in range(total_samples): H_all_2.append(H_all[i]**2) weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) time_calculating_strategy_takes = [] # Virtual neighbor placer neighbors_in = collections.deque([], 2) neighbors = collections.deque([], 2) sims_pos_p = np.zeros(N).astype(int) - 1 policy = DDPG.DDPG(options, options_policy, N, Pmax, noise_var) # Start the simulation 2 # Sum rate for the simulation 1 sum_rate_distributed_policy = [] sum_rate_list_distributed_policy = collections.deque([], 2) # Initial allocation is just random p_central = Pmax * np.random.rand(N) p_strategy = np.array( p_central) # strategy is a completely different object p_strategy_current = np.array(p_strategy) time_calculating_strategy_takes = [] time_optimization_at_each_slot_takes = [] p_strategy_all = [] with tf.Session() as sess: sess.run(policy.init) policy.initialize_critic_updates(sess) policy.initialize_actor_updates(sess) # Start iterating voer time slots for sim in range(total_samples): policy.check_memory_restart(sess, sim) policy.update_handler(sess, sim) # save an instance per training episode for testing purposes. if (sim % train_episodes['T_train'] == 0): model_destination = ( './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' % (json_file, json_file_policy, overal_sims, int(float(sim) / train_episodes['T_train']))).replace( '[', '').replace(']', '') policy.save(sess, model_destination) # If at least one time slot passed to get experience if (sim % train_episodes['T_train'] > 1): # Each agent picks its strategy. for agent in range(N): current_local_state = policy.local_state( sim, agent, p_strategy_all, H_all_2, neighbors, neighbors_in, sum_rate_list_distributed_policy, sims_pos_p) a_time = time.time() strategy = policy.act(sess, current_local_state, sim, agent) time_calculating_strategy_takes.append(time.time() - a_time) if (sim % train_episodes['T_train'] > 2 ): # Koew, There is prev state to form experience. sorted_neighbors_criteria = np.log10( H_all_2[sim - 1][np.array(neighbors[-1][agent]), agent] / policy.prev_suminterferences[neighbors[-1] [agent]]) sorted_neighbors = neighbors[-1][agent][np.argsort( sorted_neighbors_criteria)[::-1]] if len(sorted_neighbors) > N_neighbors: sorted_neighbors = sorted_neighbors[: N_neighbors] sorted_neighbors = np.append( sorted_neighbors, agent) current_reward = np.sum( np.multiply( weights[sim - 1], sum_rate_list_distributed_policy[-1] [:, agent])[sorted_neighbors]) policy.remember(agent, current_local_state, current_reward) # Only train it once per timeslot agent == 0 ensures that if agent == ( N - 1 ): # If there is enough data to create a mini batch a_time = time.time() # TRAIK for a minibatch policy.train(sess, sim) time_optimization_at_each_slot_takes.append( time.time() - a_time) # Pick the action p_strategy[agent] = policy.Pmax * strategy #** 10 # Add current state to the short term memory to observe it during the next state policy.previous_state[agent, :] = current_local_state policy.previous_action[agent] = strategy if (sim % train_episodes['T_train'] < 2): p_strategy = np.random.rand(N) p_strategy_current = np.array(p_strategy) policy.prev_suminterferences = np.matmul( H_all_2[sim], p_strategy) - (H_all_2[sim].diagonal() * p_strategy) + noise_var sims_pos_p[np.where(p_strategy_current > 0)] = sim tmp_neighbors_in = [] tmp_neighbors = [] for nei_i in range(N): neigh_tmp_variab = np.where( (H_all[sim][nei_i, :]**2) * p_strategy_current > neightresh) neigh_tmp_variab = np.delete( neigh_tmp_variab, np.where(neigh_tmp_variab[0] == nei_i)) tmp_neighbors_in.append(neigh_tmp_variab) for nei_i in range(N): tmp_neighlist = [] for nei_j in range(N): if (len(np.where(tmp_neighbors_in[nei_j] == nei_i)[0]) != 0): tmp_neighlist.append(nei_j) if (len(tmp_neighlist) == 0 and len(neighbors) > 0): tmp_neighbors.append(np.array(neighbors[-1][nei_i])) else: tmp_neighbors.append(np.array(tmp_neighlist)) neighbors.append(tmp_neighbors) neighbors_in.append(tmp_neighbors_in) # all sumrates in a list sum_rate_list_distributed_policy.append( pb.reward_helper(H_all[sim], p_strategy, N, noise_var, Pmax, neighbors_in[-1])) sum_rate_distributed_policy.append( pb.sumrate_weighted_clipped(H_all[sim], p_strategy, N, noise_var, weights[sim])) p_strategy_all.append(np.array(p_strategy)) if (sim % 2500 == 0): print('Time %d sim %d' % (sim, overal_sims)) policy.equalize(sess) print('Train is over sim %d' % (overal_sims)) model_destination = ( './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' % (json_file, json_file_policy, overal_sims, int(float(total_samples) / train_episodes['T_train']))).replace('[', '').replace(']', '') policy.save(sess, model_destination) # End Train Phase np_save_path = './simulations/sumrate/train/%s_%s_network%d.ckpt' % ( json_file, json_file_policy, overal_sims) print(np_save_path) np.savez(np_save_path, options, options_policy, sum_rate_distributed_policy, p_strategy_all, time_optimization_at_each_slot_takes, time_calculating_strategy_takes)
def main(args): json_file = args.json_file num_sim = args.num_sim with open('./config/deployment/' + json_file + '.json', 'r') as f: options = json.load(f) ## Kumber of samples total_samples = options['simulation']['total_samples'] N = options['simulation']['N'] # Kow assume each time slot is 1ms and isTrain = options['simulation']['isTrain'] if isTrain and num_sim == -1: num_simulations = options['simulation']['num_simulations'] simulation = options['simulation']['simulation_index_start'] elif isTrain: num_simulations = 1 simulation = num_sim else: simulation = 0 num_simulations = 1 # simulation parameters mobility_params = options['mobility_params'] mobility_params['alpha_angle'] = options['mobility_params'][ 'alpha_angle_rad'] * np.pi #radian/sec #Some defaults Pmax_dB = 38.0 - 30 Pmax = np.power(10.0, Pmax_dB / 10) n0_dB = -114.0 - 30 noise_var = np.power(10.0, n0_dB / 10) # Hyper aprameters for overal_sims in range(simulation, simulation + num_simulations): if isTrain: np.random.seed(50 + overal_sims) else: np.random.seed(1050 + overal_sims + N) file_path = './simulations/channel/%s_network%d' % (json_file, overal_sims) data = np.load(file_path + '.npz', allow_pickle=True) H_all = data['arr_1'] weights = [] for loop in range(total_samples): weights.append(np.array(np.ones(N))) # Init Optimizer results p_FP_nodelay = [] time_FP_nodelay = [] p_WMMSE_nodelay = [] time_WMMSE_nodelay = [] print('Ideal Case Run sim %d' % (overal_sims)) print('Run FP sim %d' % (overal_sims)) (p_FP_nodelay, time_FP_nodelay) = zip(*[ pb.FP_algorithm_weighted(N, H, Pmax, noise_var, weight) for (H, weight) in zip(H_all, weights) ]) print('Run WMMSE sim %d' % (overal_sims)) (p_WMMSE_nodelay, time_WMMSE_nodelay) = zip(*[ pb.WMMSE_algorithm_weighted(N, H, Pmax, noise_var, weight) for (H, weight) in zip(H_all, weights) ]) # # General simulations sum_rate_nodelay = [ pb.sumrate_weighted_clipped(H, p, N, noise_var, weight) for (H, p, weight) in zip(H_all, p_FP_nodelay, weights) ] sum_rate_WMMSE = [ pb.sumrate_weighted_clipped(H, p, N, noise_var, weight) for (H, p, weight) in zip(H_all, p_WMMSE_nodelay, weights) ] # Kow, simulate the process where we use the original FP algorithm # Assumption is we ignore the delay at the backhaul network, i.e. there is no delay between the UE and the central controller. ##################### OTHER BENCHMARKS ##################### # In this simulation I assume that the central allocator directly uses the most recent channel condition available. # Sum rate for the simulation 1 sum_rate_delayed_central = [] sum_rate_random = [] sum_rate_max = [] # Initial allocation is just random p_central = Pmax * np.random.rand(N) for sim in range(total_samples): if (sim > 0): p_central = p_FP_nodelay[sim - 1] sum_rate_delayed_central.append( pb.sumrate_weighted_clipped(H_all[sim], p_central, N, noise_var, weights[sim])) sum_rate_random.append( pb.sumrate_weighted_clipped(H_all[sim], Pmax * np.random.rand(N), N, noise_var, weights[sim])) sum_rate_max.append( pb.sumrate_weighted_clipped(H_all[sim], Pmax * np.ones(N), N, noise_var, weights[sim])) np_save_path = './simulations/sumrate/benchmarks/%s_network%d' % ( json_file, overal_sims) np.savez(np_save_path, p_FP_nodelay, time_FP_nodelay, sum_rate_nodelay, p_WMMSE_nodelay, time_WMMSE_nodelay, sum_rate_WMMSE, sum_rate_delayed_central, sum_rate_random, sum_rate_max) print('Saved to %s' % (np_save_path))