def main(): # 0.99996 for 30000 iterations # 0.999 for 1000 iterations # 0.9998 for 10000 iterations # 0.99995 for 20000 # 0.999965 for 40000 # 0.999955 for 50000 # 0.999975 for 60000 # 0.999977 for 100000 # 0.999993 for 200000 # 0.999997 for 500000 # 0.999997for 1000000 # 0.999999 for 2000000 # 0.9999994 for 3000000 # 0.9999997 for 6000000 robot = SwimmingRobot(t_interval=8) trial_name = 'DQN_swimming_w_theta_forward_20000_iters' trial_num = 0 reward_function = forward_reward_function episodes = 20 iterations = 1000 total_iterations = episodes * iterations network_update_freq = 20 batch_size = 8 epsilon_decay = 0.99995 learning_rate = 2e-4 model_architecture = (50, 10) dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=False, input_dim=5, output_dim=1, actions_params=(-pi/8, pi/8, pi/8), model_architecture=model_architecture, memory_size=total_iterations//50, memory_buffer_coef=20, randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=0.1, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=None) dqn_agent.run()
def main(): # 0.99996 for 30000 iterations # 0.999 for 1000 iterations # 0.9998 for 10000 iterations # 0.99995 for 20000 # 0.999965 for 40000 # 0.99996 for 50000 # 0.999975 for 60000 # 0.999985 for 100000 # 0.999993 for 200000 # 0.999997 for 500000 # 0.9999987 for 1000000 # 0.999999 for 2000000 # 0.9999994 for 3000000 # 0.9999997 for 6000000 robot = ThreeLinkRobot(t_interval=4) trial_name = 'DQN_wheeled_w_theta_largest_action_50000_iters' trial_num = 29 episodes = 50 iterations = 1000 total_iterations = episodes * iterations network_update_freq = 50 batch_size = 8 epsilon_decay = 0.99996 learning_rate = 2e-4 dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=True, input_dim=5, output_dim=1, actions_params=(-pi / 8, pi / 8, pi / 8), model_architecture=(50, 10), memory_size=total_iterations // 50, memory_buffer_coef=20, randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=0.1, epsilon_decay=epsilon_decay, learning_rate=learning_rate) dqn_agent.run()
def main(): robot_type = args.robot_type if robot_type == "swimming": robot = SwimmingRobot(t_interval=args.t_interval, a_upper=args.a_upper, a_lower=args.a_lower, no_joint_limit=args.no_joint_limit) check_singularity = False elif robot_type == "wheeled": robot = ThreeLinkRobot(t_interval=args.t_interval) check_singularity = True else: raise ValueError("Unknown robot type: {}".format(robot_type)) episodes = args.episodes iterations = args.iterations total_iterations = episodes * iterations if args.reward_func == "forward": reward_function = forward_reward_function elif args.reward_func == "left": reward_function = left_reward_function else: raise ValueError("Unknown reward function: {}".format(args.reward_func)) network_update_freq = args.network_update_freq batch_size = args.batch_size epsilon_min = args.epsilon_min epsilon_decay = epsilon_min ** (1/total_iterations) learning_rate = args.learning_rate model_architecture = [int(num) for num in args.model_architecture.split(' ')] trial_num = args.trial_num trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations) if args.trial_note: trial_name += "_{}".format(args.trial_note) params = { "robot_type": args.robot_type, "t_interval": args.t_interval, "a_upper": args.a_upper, "a_lower": args.a_lower, "no_joint_limit:": args.no_joint_limit, "trial_num": args.trial_num, "trial_note": args.trial_note, "episodes": args.episodes, "iterations": args.iterations, "reward_func": args.reward_func, "network_update_freq": args.network_update_freq, "epsilon_min": args.epsilon_min, "batch_size": args.batch_size, "learning_rate": args.learning_rate, "model_architecture": args.model_architecture, } dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=check_singularity, input_dim=len(robot.state) + 2, output_dim=1, actions_params=(-pi/8, pi/8, pi/8), model_architecture=model_architecture, memory_size=total_iterations//50, memory_buffer_coef=5, #5 don't forget to change back to 20! randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=params) dqn_agent.run()
def main(): robot_type = args.robot_type if robot_type == "physical": robot = PhysicalRobot(delay=args.delay) check_singularity = False is_physical_robot = True else: raise ValueError("Unknown robot type: {}".format(robot_type)) episodes = args.episodes iterations = args.iterations total_iterations = episodes * iterations if args.reward_func == "forward": reward_function = physical_forward_reward_function else: raise ValueError("Unknown reward function: {}".format(args.reward_func)) a_lower = args.a_lower a_upper = args.a_upper a_interval = args.a_interval action_params = (a_lower, a_upper, a_interval) network_update_freq = args.network_update_freq batch_size = args.batch_size epsilon_min = args.epsilon_min epsilon_decay = epsilon_min **(1/total_iterations) learning_rate = args.learning_rate model_architecture = [int(num) for num in args.model_architecture.split('_')] trial_num = args.trial_num trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations) if args.trial_note: trial_name += "_{}".format(args.trial_note) params = { "robot_type": args.robot_type, "delay": args.delay, "a_lower": args.a_lower, "a_upper": args.a_upper, "a_interval": args.a_interval, "trial_num": args.trial_num, "trial_note": args.trial_note, "episodes": args.episodes, "iterations": args.iterations, "reward_func": args.reward_func, "network_update_freq": args.network_update_freq, "epsilon_min": args.epsilon_min, "batch_size": args.batch_size, "learning_rate": args.learning_rate, "model_architecture": args.model_architecture, } dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=check_singularity, is_physical_robot=is_physical_robot, input_dim=len(robot.state) + 2, output_dim=1, actions_params=action_params, model_architecture=model_architecture, memory_size=total_iterations//5, #10 memory_buffer_coef=20, #20 randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=params) dqn_agent.run()