def main():
    args = get_args()
    print(args)
    create_data_folders()
    args.gradients = ['discount']
    study_regress(args)
    plot_results(args)
        env.reinit()
        memory = ReplayBuffer()

        # Initialise the policy/actor
        policy = PolicyNet(params.lr_actor, params.init_alpha, params.lr_alpha, params.target_entropy_alpha)
        pw = PolicyWrapper(policy, params.policy_type, params.env_name, params.team_name, params.max_episode_steps)
        pw.duration_flag = False
        # Initialise the critics
        critic = DoubleQNet(params.lr_critic,params.gamma, params.tau)

        plot_policy(policy, env, True, params.env_name, params.study_name, '_ante_', j, plot=False)

        simulation.train(memory, pw, critic, policy_loss_file, critic_loss_file)

        plot_policy(policy, env, True, params.env_name, params.study_name, '_post_', j, plot=False)
        plot_critic(env, params.env_name, critic.q1, policy, params.study_name, '_q1_post_', j)
        plot_critic(env, params.env_name, critic.q2, policy, params.study_name, '_q2_post_', j)
        critic.q1.save_model('data/critics/{}#{}#SAC{}.pt'.format(params.env_name, params.team_name, str(j)))
        critic.q2.save_model('data/critics/{}#{}#SAC{}.pt'.format(params.env_name, params.team_name, str(j)))

    simulation.env.close()
    chrono.stop()


if __name__ == '__main__':
    args = get_args()
    print(args)
    create_data_folders()
    main(args)
    plot_results(args)