def main(): """ Train the agent """ # number of different trainings settings train_num = [1, 2, 3, 4, 5, 6, 7, 8, 9] # number of each D2D's Real FB num_feedback_set = [3] # number of BS output, that is, the number of compressed global information num_bs_output_set = [12] gamma_set = [0.05] batch_set = [512] # weight for the V2V sum rate v2v_weight = 1 # weight for the V2I sum rate v2i_weight = 0.1 num_train_settings = 1 # start training for train_loop in range(num_train_settings): # set the current random seed for training train_seed_sequence = 1001 random.seed(train_seed_sequence) np.random.seed(train_seed_sequence) tf.set_random_seed(train_seed_sequence) # set values for current simulation curr_RL_Config = RL_Config() train_show_tra = '-----Start the Number -- ' + str( train_num[train_loop]) + ' -- training -----!' print(train_show_tra) # set key parameters for this train num_feedback = num_feedback_set[0] gamma = gamma_set[0] batch_size = batch_set[0] num_bs_output = num_bs_output_set[train_loop] curr_RL_Config.set_train_value(num_feedback, gamma, batch_size, num_bs_output, v2v_weight, v2i_weight) # start the Environment Env = start_env() # run the training process [Train_Loss, Reward_Per_Train_Step, Reward_Per_Episode, Train_Q_mean, Train_Q_max_mean, Orig_Train_Q_mean, Orig_Train_Q_max_mean] \ = run_train(Env, curr_RL_Config) # save the train results save_flag = save_train_results(Train_Loss, Reward_Per_Train_Step, Reward_Per_Episode, Train_Q_mean, Train_Q_max_mean, Orig_Train_Q_mean, Orig_Train_Q_max_mean, curr_RL_Config, Env) if save_flag: print('RL Training is finished!')
def main(): """ Test the trained agent """ # set of different testing seeds test_num = [1] # set of different D2D FBs num_feedback_set = [3] # number of BS outputs, that is, the number of compressed global information num_bs_output_set = [12, 12, 16, 24] gamma_set = [0.05] # set of different batch sizes batch_set = [512] # number of different testing seeds num_test_settings = 1 # number of different BS outputs and FBs num_BS_Out = 1 # weight for the V2V sum rate v2v_weight = 1 # weight for the V2I sum rate v2i_weight = 0.1 # parameter setting for testing num_test_episodes = 2000 num_test_steps = 1000 opt_flag = False # start training for BS_loop in range(num_BS_Out): # run at different FB curr_FB = num_feedback_set[0] curr_BS_Output = num_bs_output_set[BS_loop] FB_str = '>>>>>>>>>Testing FB = ' + str(curr_FB) + ' -BS Outputs = ' + str(curr_BS_Output) \ + ' at different random seeds<<<<<<<<<' print(FB_str) for test_loop in range(num_test_settings): # set the current random seed for training test_seed_sequence = test_num[test_loop] random.seed(test_seed_sequence) np.random.seed(test_seed_sequence) tf.set_random_seed(test_seed_sequence) # set values for current simulation curr_RL_Config = RL_Config() train_show_tra = '----- Start the Number -- ' + str( test_num[test_loop]) + ' -- Testing -----!' print(train_show_tra) # set key parameters for this train num_feedback = num_feedback_set[0] gamma = gamma_set[0] batch_size = batch_set[0] num_bs_output = num_bs_output_set[BS_loop] curr_RL_Config.set_train_value(num_feedback, gamma, batch_size, num_bs_output, v2v_weight, v2i_weight) # display the parameters settings for current trained model curr_RL_Config.display() # start the Environment Env = start_env() # load the trained model BS_Agent = load_trained_model(Env, curr_RL_Config) # set key parameters for this testing curr_RL_Config.set_test_values(num_test_episodes, num_test_steps, opt_flag, v2v_weight, v2i_weight) # run the testing process and save the testing results save_flag = run_test(curr_RL_Config, BS_Agent, test_seed_sequence) # track the testing process if save_flag: print('RL Testing is finished!')