def main():

    # 0.99996 for 30000 iterations
    # 0.999 for 1000 iterations
    # 0.9998 for 10000 iterations
    # 0.99995 for 20000
    # 0.999965 for 40000
    # 0.999955 for 50000
    # 0.999975 for 60000
    # 0.999977 for 100000
    # 0.999993 for 200000
    # 0.999997 for 500000
    # 0.999997for 1000000
    # 0.999999 for 2000000
    # 0.9999994 for 3000000
    # 0.9999997 for 6000000

    robot = SwimmingRobot(t_interval=8)
    trial_name = 'DQN_swimming_w_theta_forward_20000_iters'
    trial_num = 0
    reward_function = forward_reward_function
    episodes = 20
    iterations = 1000
    total_iterations = episodes * iterations
    network_update_freq = 20
    batch_size = 8
    epsilon_decay = 0.99995
    learning_rate = 2e-4
    model_architecture = (50, 10)

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=False,
                          input_dim=5,
                          output_dim=1,
                          actions_params=(-pi/8, pi/8, pi/8),
                          model_architecture=model_architecture,
                          memory_size=total_iterations//50,
                          memory_buffer_coef=20,
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=0.1,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=None)

    dqn_agent.run()
Beispiel #2
0
def main():

    # 0.99996 for 30000 iterations
    # 0.999 for 1000 iterations
    # 0.9998 for 10000 iterations
    # 0.99995 for 20000
    # 0.999965 for 40000
    # 0.99996 for 50000
    # 0.999975 for 60000
    # 0.999985 for 100000
    # 0.999993 for 200000
    # 0.999997 for 500000
    # 0.9999987 for 1000000
    # 0.999999 for 2000000
    # 0.9999994 for 3000000
    # 0.9999997 for 6000000

    robot = ThreeLinkRobot(t_interval=4)
    trial_name = 'DQN_wheeled_w_theta_largest_action_50000_iters'
    trial_num = 29
    episodes = 50
    iterations = 1000
    total_iterations = episodes * iterations
    network_update_freq = 50
    batch_size = 8
    epsilon_decay = 0.99996
    learning_rate = 2e-4

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=True,
                          input_dim=5,
                          output_dim=1,
                          actions_params=(-pi / 8, pi / 8, pi / 8),
                          model_architecture=(50, 10),
                          memory_size=total_iterations // 50,
                          memory_buffer_coef=20,
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=0.1,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate)

    dqn_agent.run()
Beispiel #3
0
def main():
    robot_type = args.robot_type
    if robot_type == "swimming":
        robot = SwimmingRobot(t_interval=args.t_interval,
                              a_upper=args.a_upper,
                              a_lower=args.a_lower,
                              no_joint_limit=args.no_joint_limit)
        check_singularity = False
    elif robot_type == "wheeled":
        robot = ThreeLinkRobot(t_interval=args.t_interval)
        check_singularity = True
    else:
        raise ValueError("Unknown robot type: {}".format(robot_type))

    episodes = args.episodes
    iterations = args.iterations
    total_iterations = episodes * iterations
    if args.reward_func == "forward":
        reward_function = forward_reward_function
    elif args.reward_func == "left":
        reward_function = left_reward_function
    else:
        raise ValueError("Unknown reward function: {}".format(args.reward_func))

    network_update_freq = args.network_update_freq
    batch_size = args.batch_size
    epsilon_min = args.epsilon_min
    epsilon_decay = epsilon_min ** (1/total_iterations)
    learning_rate = args.learning_rate
    model_architecture = [int(num) for num in args.model_architecture.split(' ')]

    trial_num = args.trial_num
    trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations)
    if args.trial_note:
        trial_name += "_{}".format(args.trial_note)

    params = {
        "robot_type": args.robot_type,
        "t_interval": args.t_interval,
        "a_upper": args.a_upper,
        "a_lower": args.a_lower,
        "no_joint_limit:": args.no_joint_limit,
        "trial_num": args.trial_num,
        "trial_note": args.trial_note,
        "episodes": args.episodes,
        "iterations": args.iterations,
        "reward_func": args.reward_func,
        "network_update_freq": args.network_update_freq,
        "epsilon_min": args.epsilon_min,
        "batch_size": args.batch_size,
        "learning_rate": args.learning_rate,
        "model_architecture": args.model_architecture,
    }

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=check_singularity,
                          input_dim=len(robot.state) + 2,
                          output_dim=1,
                          actions_params=(-pi/8, pi/8, pi/8),
                          model_architecture=model_architecture,
                          memory_size=total_iterations//50,
                          memory_buffer_coef=5, #5 don't forget to change back to 20!
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=epsilon_min,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=params)

    dqn_agent.run()
def main():
    robot_type = args.robot_type
    if robot_type == "physical":
        robot = PhysicalRobot(delay=args.delay)
        check_singularity = False
        is_physical_robot = True
    else:
        raise ValueError("Unknown robot type: {}".format(robot_type))

    episodes = args.episodes
    iterations = args.iterations
    total_iterations = episodes * iterations
    if args.reward_func == "forward":
        reward_function = physical_forward_reward_function
    else:
        raise ValueError("Unknown reward function: {}".format(args.reward_func))

    a_lower = args.a_lower
    a_upper = args.a_upper
    a_interval = args.a_interval
    action_params = (a_lower, a_upper, a_interval)
    network_update_freq = args.network_update_freq
    batch_size = args.batch_size
    epsilon_min = args.epsilon_min
    epsilon_decay = epsilon_min **(1/total_iterations)
    learning_rate = args.learning_rate
    model_architecture = [int(num) for num in args.model_architecture.split('_')]

    trial_num = args.trial_num
    trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations)
    if args.trial_note:
        trial_name += "_{}".format(args.trial_note)

    params = {
        "robot_type": args.robot_type,
        "delay": args.delay,
        "a_lower": args.a_lower,
        "a_upper": args.a_upper,
        "a_interval": args.a_interval, 
        "trial_num": args.trial_num,
        "trial_note": args.trial_note,
        "episodes": args.episodes,
        "iterations": args.iterations,
        "reward_func": args.reward_func,
        "network_update_freq": args.network_update_freq,
        "epsilon_min": args.epsilon_min,
        "batch_size": args.batch_size,
        "learning_rate": args.learning_rate,
        "model_architecture": args.model_architecture,
    }

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=check_singularity,
                          is_physical_robot=is_physical_robot,
                          input_dim=len(robot.state) + 2,
                          output_dim=1,
                          actions_params=action_params,
                          model_architecture=model_architecture,
                          memory_size=total_iterations//5, #10 
                          memory_buffer_coef=20, #20
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=epsilon_min,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=params)

    dqn_agent.run()