def main():

    # 0.99996 for 30000 iterations
    # 0.999 for 1000 iterations
    # 0.9998 for 10000 iterations
    # 0.99995 for 20000
    # 0.999965 for 40000
    # 0.999955 for 50000
    # 0.999975 for 60000
    # 0.999977 for 100000
    # 0.999993 for 200000
    # 0.999997 for 500000
    # 0.999997for 1000000
    # 0.999999 for 2000000
    # 0.9999994 for 3000000
    # 0.9999997 for 6000000

    robot = SwimmingRobot(t_interval=8)
    trial_name = 'DQN_swimming_w_theta_forward_20000_iters'
    trial_num = 0
    reward_function = forward_reward_function
    episodes = 20
    iterations = 1000
    total_iterations = episodes * iterations
    network_update_freq = 20
    batch_size = 8
    epsilon_decay = 0.99995
    learning_rate = 2e-4
    model_architecture = (50, 10)

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=False,
                          input_dim=5,
                          output_dim=1,
                          actions_params=(-pi/8, pi/8, pi/8),
                          model_architecture=model_architecture,
                          memory_size=total_iterations//50,
                          memory_buffer_coef=20,
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=0.1,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=None)

    dqn_agent.run()
Exemple #2
0
def main():

    # 0.99996 for 30000 iterations
    # 0.999 for 1000 iterations
    # 0.9998 for 10000 iterations
    # 0.99995 for 20000
    # 0.999965 for 40000
    # 0.99996 for 50000
    # 0.999975 for 60000
    # 0.999985 for 100000
    # 0.999993 for 200000
    # 0.999997 for 500000
    # 0.9999987 for 1000000
    # 0.999999 for 2000000
    # 0.9999994 for 3000000
    # 0.9999997 for 6000000

    robot = ThreeLinkRobot(t_interval=4)
    trial_name = 'DQN_wheeled_w_theta_largest_action_50000_iters'
    trial_num = 29
    episodes = 50
    iterations = 1000
    total_iterations = episodes * iterations
    network_update_freq = 50
    batch_size = 8
    epsilon_decay = 0.99996
    learning_rate = 2e-4

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=True,
                          input_dim=5,
                          output_dim=1,
                          actions_params=(-pi / 8, pi / 8, pi / 8),
                          model_architecture=(50, 10),
                          memory_size=total_iterations // 50,
                          memory_buffer_coef=20,
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=0.1,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate)

    dqn_agent.run()
def run_environment(h_size, middle_size, lstm_layers, learning_starts,
                    learning_freq, target_update_freq, lr, gamma, batch_size,
                    replay_buffer_size, epsilon_decay_steps, final_epsilon,
                    root_dir, num):
    log_dir = os.path.join(root_dir, "{:03}".format(num))
    os.makedirs(log_dir)
    agent = DQN_Agent(h_size,
                      middle_size,
                      lstm_layers,
                      learning_starts,
                      learning_freq,
                      target_update_freq,
                      lr,
                      gamma,
                      batch_size,
                      replay_buffer_size,
                      epsilon_decay_steps,
                      final_epsilon,
                      verbose=True,
                      log_dir=log_dir)

    Reward_func = getattr(reward, training['reward_func'])
    agent.train(Reward_func, training['reward_settings'], training['episodes'],
                training['targets'], training['reg_inits'])
    agent.save("best", best=True)
    performance = agent.global_performance()
    best_performance, best_episode = agent.best_performance()
    return performance + (1 / (1 + best_episode))
Exemple #4
0
def main():
    args = parse_arguments()
    agent = DQN_Agent(args, memory_size=args.memory_size, burn_in=args.burn_in)

    if args.train == 1:
        if not os.path.exists(args.folder_prefix):
            os.makedirs(args.folder_prefix)

        sys.stdout = Logger(args.folder_prefix + args.logfile)
        print_user_flags(args)

        PolicyModel = args.folder_prefix + 'PolicyModel/'
        TargetModel = args.folder_prefix + 'TargetModel/'
        RewardsCSV = args.folder_prefix + 'RewardsCSV/'

        if not os.path.exists(PolicyModel):
            os.makedirs(PolicyModel)
        elif args.reset_dir:
            shutil.rmtree(PolicyModel, ignore_errors=True)
            os.makedirs(PolicyModel)
        if not os.path.exists(TargetModel):
            os.makedirs(TargetModel)
        elif args.reset_dir:
            shutil.rmtree(TargetModel, ignore_errors=True)
            os.makedirs(TargetModel)
        if not os.path.exists(RewardsCSV):
            os.makedirs(RewardsCSV)
        elif args.reset_dir:
            shutil.rmtree(RewardsCSV, ignore_errors=True)
            os.makedirs(RewardsCSV)

        agent.train()
    else:
        agent.test(test_epi=args.test_epi,
                   model_file=args.weight_file,
                   lookahead=agent.greedy_policy)

    agent.agent_close()
Exemple #5
0
def main():
    robot_type = args.robot_type
    if robot_type == "swimming":
        robot = SwimmingRobot(t_interval=args.t_interval,
                              a_upper=args.a_upper,
                              a_lower=args.a_lower,
                              no_joint_limit=args.no_joint_limit)
        check_singularity = False
    elif robot_type == "wheeled":
        robot = ThreeLinkRobot(t_interval=args.t_interval)
        check_singularity = True
    else:
        raise ValueError("Unknown robot type: {}".format(robot_type))

    episodes = args.episodes
    iterations = args.iterations
    total_iterations = episodes * iterations
    if args.reward_func == "forward":
        reward_function = forward_reward_function
    elif args.reward_func == "left":
        reward_function = left_reward_function
    else:
        raise ValueError("Unknown reward function: {}".format(args.reward_func))

    network_update_freq = args.network_update_freq
    batch_size = args.batch_size
    epsilon_min = args.epsilon_min
    epsilon_decay = epsilon_min ** (1/total_iterations)
    learning_rate = args.learning_rate
    model_architecture = [int(num) for num in args.model_architecture.split(' ')]

    trial_num = args.trial_num
    trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations)
    if args.trial_note:
        trial_name += "_{}".format(args.trial_note)

    params = {
        "robot_type": args.robot_type,
        "t_interval": args.t_interval,
        "a_upper": args.a_upper,
        "a_lower": args.a_lower,
        "no_joint_limit:": args.no_joint_limit,
        "trial_num": args.trial_num,
        "trial_note": args.trial_note,
        "episodes": args.episodes,
        "iterations": args.iterations,
        "reward_func": args.reward_func,
        "network_update_freq": args.network_update_freq,
        "epsilon_min": args.epsilon_min,
        "batch_size": args.batch_size,
        "learning_rate": args.learning_rate,
        "model_architecture": args.model_architecture,
    }

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=check_singularity,
                          input_dim=len(robot.state) + 2,
                          output_dim=1,
                          actions_params=(-pi/8, pi/8, pi/8),
                          model_architecture=model_architecture,
                          memory_size=total_iterations//50,
                          memory_buffer_coef=5, #5 don't forget to change back to 20!
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=epsilon_min,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=params)

    dqn_agent.run()
action_params = (a_lower, a_upper, a_interval)
episodes = params['episodes']
iterations = params['iterations']
total_iterations = episodes * iterations
dqn_agent = DQN_Agent(
    robot=robot,
    reward_function=physical_forward_reward_function,
    trial_name="transfer learning test",
    trial_num=0,
    episodes=params['episodes'],
    iterations=params['iterations'],
    network_update_freq=params['network_update_freq'],
    check_singularity=False,
    is_physical_robot=True,
    input_dim=len(robot.state) + 2,
    output_dim=1,
    actions_params=action_params,
    model_architecture=params['model_architecture'],
    memory_size=total_iterations // 3,  # 10
    memory_buffer_coef=20,  # 20
    randomize_theta=False,
    batch_size=params['batch_size'],
    gamma=0.99,
    epsilon=1.0,
    epsilon_min=params['epsilon_min'],
    epsilon_decay=params['epsilon_decay'],
    learning_rate=params['learning_rate'],
    params=params)
dqn_agent.load_model(json_name=json_fname, h5_name=h5_fname)

# Policy Rollout
dqn_agent.policy_rollout(timesteps=50)
def main():
    robot_type = args.robot_type
    if robot_type == "physical":
        robot = PhysicalRobot(delay=args.delay)
        check_singularity = False
        is_physical_robot = True
    else:
        raise ValueError("Unknown robot type: {}".format(robot_type))

    episodes = args.episodes
    iterations = args.iterations
    total_iterations = episodes * iterations
    if args.reward_func == "forward":
        reward_function = physical_forward_reward_function
    else:
        raise ValueError("Unknown reward function: {}".format(args.reward_func))

    a_lower = args.a_lower
    a_upper = args.a_upper
    a_interval = args.a_interval
    action_params = (a_lower, a_upper, a_interval)
    network_update_freq = args.network_update_freq
    batch_size = args.batch_size
    epsilon_min = args.epsilon_min
    epsilon_decay = epsilon_min **(1/total_iterations)
    learning_rate = args.learning_rate
    model_architecture = [int(num) for num in args.model_architecture.split('_')]

    trial_num = args.trial_num
    trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations)
    if args.trial_note:
        trial_name += "_{}".format(args.trial_note)

    params = {
        "robot_type": args.robot_type,
        "delay": args.delay,
        "a_lower": args.a_lower,
        "a_upper": args.a_upper,
        "a_interval": args.a_interval, 
        "trial_num": args.trial_num,
        "trial_note": args.trial_note,
        "episodes": args.episodes,
        "iterations": args.iterations,
        "reward_func": args.reward_func,
        "network_update_freq": args.network_update_freq,
        "epsilon_min": args.epsilon_min,
        "batch_size": args.batch_size,
        "learning_rate": args.learning_rate,
        "model_architecture": args.model_architecture,
    }

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=check_singularity,
                          is_physical_robot=is_physical_robot,
                          input_dim=len(robot.state) + 2,
                          output_dim=1,
                          actions_params=action_params,
                          model_architecture=model_architecture,
                          memory_size=total_iterations//5, #10 
                          memory_buffer_coef=20, #20
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=epsilon_min,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=params)

    dqn_agent.run()