def main():
    """
    Train the agent
    """
    # number of different trainings settings
    train_num = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    # number of each D2D's Real FB
    num_feedback_set = [3]
    # number of BS output, that is, the number of compressed global information
    num_bs_output_set = [12]
    gamma_set = [0.05]
    batch_set = [512]

    # weight for the V2V sum rate
    v2v_weight = 1
    # weight for the V2I sum rate
    v2i_weight = 0.1

    num_train_settings = 1

    # start training
    for train_loop in range(num_train_settings):

        # set the current random seed for training
        train_seed_sequence = 1001
        random.seed(train_seed_sequence)
        np.random.seed(train_seed_sequence)
        tf.set_random_seed(train_seed_sequence)

        # set values for current simulation
        curr_RL_Config = RL_Config()

        train_show_tra = '-----Start the Number -- ' + str(
            train_num[train_loop]) + ' -- training -----!'
        print(train_show_tra)

        # set key parameters for this train
        num_feedback = num_feedback_set[0]
        gamma = gamma_set[0]
        batch_size = batch_set[0]
        num_bs_output = num_bs_output_set[train_loop]
        curr_RL_Config.set_train_value(num_feedback, gamma, batch_size,
                                       num_bs_output, v2v_weight, v2i_weight)

        # start the Environment
        Env = start_env()

        # run the training process
        [Train_Loss,  Reward_Per_Train_Step, Reward_Per_Episode,
        Train_Q_mean, Train_Q_max_mean, Orig_Train_Q_mean, Orig_Train_Q_max_mean] \
            = run_train(Env, curr_RL_Config)

        # save the train results
        save_flag = save_train_results(Train_Loss, Reward_Per_Train_Step,
                                       Reward_Per_Episode, Train_Q_mean,
                                       Train_Q_max_mean, Orig_Train_Q_mean,
                                       Orig_Train_Q_max_mean, curr_RL_Config,
                                       Env)
        if save_flag:
            print('RL Training is finished!')
def main():
    """
    Test the trained agent
    """
    # set of different testing seeds
    test_num = [1]
    # set of different D2D FBs
    num_feedback_set = [3]
    # number of BS outputs, that is, the number of compressed global information
    num_bs_output_set = [12, 12, 16, 24]
    gamma_set = [0.05]
    # set of different batch sizes
    batch_set = [512]
    # number of different testing seeds
    num_test_settings = 1

    # number of different BS outputs and FBs
    num_BS_Out = 1

    # weight for the V2V sum rate
    v2v_weight = 1
    # weight for the V2I sum rate
    v2i_weight = 0.1

    # parameter setting for testing
    num_test_episodes = 2000
    num_test_steps = 1000
    opt_flag = False

    # start training
    for BS_loop in range(num_BS_Out):
        # run at different FB
        curr_FB = num_feedback_set[0]
        curr_BS_Output = num_bs_output_set[BS_loop]
        FB_str = '>>>>>>>>>Testing FB = ' + str(curr_FB) + ' -BS Outputs = ' + str(curr_BS_Output) \
                 + ' at different random seeds<<<<<<<<<'
        print(FB_str)

        for test_loop in range(num_test_settings):

            # set the current random seed for training
            test_seed_sequence = test_num[test_loop]
            random.seed(test_seed_sequence)
            np.random.seed(test_seed_sequence)
            tf.set_random_seed(test_seed_sequence)

            # set values for current simulation
            curr_RL_Config = RL_Config()

            train_show_tra = '----- Start the Number -- ' + str(
                test_num[test_loop]) + ' -- Testing -----!'
            print(train_show_tra)

            # set key parameters for this train
            num_feedback = num_feedback_set[0]
            gamma = gamma_set[0]
            batch_size = batch_set[0]
            num_bs_output = num_bs_output_set[BS_loop]
            curr_RL_Config.set_train_value(num_feedback, gamma, batch_size,
                                           num_bs_output, v2v_weight,
                                           v2i_weight)

            # display the parameters settings for current trained model
            curr_RL_Config.display()

            # start the Environment
            Env = start_env()

            # load the trained model
            BS_Agent = load_trained_model(Env, curr_RL_Config)

            # set key parameters for this testing
            curr_RL_Config.set_test_values(num_test_episodes, num_test_steps,
                                           opt_flag, v2v_weight, v2i_weight)

            # run the testing process and save the testing results
            save_flag = run_test(curr_RL_Config, BS_Agent, test_seed_sequence)

            # track the testing process
            if save_flag:
                print('RL Testing is finished!')