def main(): # 0.99996 for 30000 iterations # 0.999 for 1000 iterations # 0.9998 for 10000 iterations # 0.99995 for 20000 # 0.999965 for 40000 # 0.999955 for 50000 # 0.999975 for 60000 # 0.999977 for 100000 # 0.999993 for 200000 # 0.999997 for 500000 # 0.999997for 1000000 # 0.999999 for 2000000 # 0.9999994 for 3000000 # 0.9999997 for 6000000 robot = SwimmingRobot(t_interval=8) trial_name = 'DQN_swimming_w_theta_forward_20000_iters' trial_num = 0 reward_function = forward_reward_function episodes = 20 iterations = 1000 total_iterations = episodes * iterations network_update_freq = 20 batch_size = 8 epsilon_decay = 0.99995 learning_rate = 2e-4 model_architecture = (50, 10) dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=False, input_dim=5, output_dim=1, actions_params=(-pi/8, pi/8, pi/8), model_architecture=model_architecture, memory_size=total_iterations//50, memory_buffer_coef=20, randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=0.1, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=None) dqn_agent.run()
def main(): # 0.99996 for 30000 iterations # 0.999 for 1000 iterations # 0.9998 for 10000 iterations # 0.99995 for 20000 # 0.999965 for 40000 # 0.99996 for 50000 # 0.999975 for 60000 # 0.999985 for 100000 # 0.999993 for 200000 # 0.999997 for 500000 # 0.9999987 for 1000000 # 0.999999 for 2000000 # 0.9999994 for 3000000 # 0.9999997 for 6000000 robot = ThreeLinkRobot(t_interval=4) trial_name = 'DQN_wheeled_w_theta_largest_action_50000_iters' trial_num = 29 episodes = 50 iterations = 1000 total_iterations = episodes * iterations network_update_freq = 50 batch_size = 8 epsilon_decay = 0.99996 learning_rate = 2e-4 dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=True, input_dim=5, output_dim=1, actions_params=(-pi / 8, pi / 8, pi / 8), model_architecture=(50, 10), memory_size=total_iterations // 50, memory_buffer_coef=20, randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=0.1, epsilon_decay=epsilon_decay, learning_rate=learning_rate) dqn_agent.run()
def run_environment(h_size, middle_size, lstm_layers, learning_starts, learning_freq, target_update_freq, lr, gamma, batch_size, replay_buffer_size, epsilon_decay_steps, final_epsilon, root_dir, num): log_dir = os.path.join(root_dir, "{:03}".format(num)) os.makedirs(log_dir) agent = DQN_Agent(h_size, middle_size, lstm_layers, learning_starts, learning_freq, target_update_freq, lr, gamma, batch_size, replay_buffer_size, epsilon_decay_steps, final_epsilon, verbose=True, log_dir=log_dir) Reward_func = getattr(reward, training['reward_func']) agent.train(Reward_func, training['reward_settings'], training['episodes'], training['targets'], training['reg_inits']) agent.save("best", best=True) performance = agent.global_performance() best_performance, best_episode = agent.best_performance() return performance + (1 / (1 + best_episode))
def main(): args = parse_arguments() agent = DQN_Agent(args, memory_size=args.memory_size, burn_in=args.burn_in) if args.train == 1: if not os.path.exists(args.folder_prefix): os.makedirs(args.folder_prefix) sys.stdout = Logger(args.folder_prefix + args.logfile) print_user_flags(args) PolicyModel = args.folder_prefix + 'PolicyModel/' TargetModel = args.folder_prefix + 'TargetModel/' RewardsCSV = args.folder_prefix + 'RewardsCSV/' if not os.path.exists(PolicyModel): os.makedirs(PolicyModel) elif args.reset_dir: shutil.rmtree(PolicyModel, ignore_errors=True) os.makedirs(PolicyModel) if not os.path.exists(TargetModel): os.makedirs(TargetModel) elif args.reset_dir: shutil.rmtree(TargetModel, ignore_errors=True) os.makedirs(TargetModel) if not os.path.exists(RewardsCSV): os.makedirs(RewardsCSV) elif args.reset_dir: shutil.rmtree(RewardsCSV, ignore_errors=True) os.makedirs(RewardsCSV) agent.train() else: agent.test(test_epi=args.test_epi, model_file=args.weight_file, lookahead=agent.greedy_policy) agent.agent_close()
def main(): robot_type = args.robot_type if robot_type == "swimming": robot = SwimmingRobot(t_interval=args.t_interval, a_upper=args.a_upper, a_lower=args.a_lower, no_joint_limit=args.no_joint_limit) check_singularity = False elif robot_type == "wheeled": robot = ThreeLinkRobot(t_interval=args.t_interval) check_singularity = True else: raise ValueError("Unknown robot type: {}".format(robot_type)) episodes = args.episodes iterations = args.iterations total_iterations = episodes * iterations if args.reward_func == "forward": reward_function = forward_reward_function elif args.reward_func == "left": reward_function = left_reward_function else: raise ValueError("Unknown reward function: {}".format(args.reward_func)) network_update_freq = args.network_update_freq batch_size = args.batch_size epsilon_min = args.epsilon_min epsilon_decay = epsilon_min ** (1/total_iterations) learning_rate = args.learning_rate model_architecture = [int(num) for num in args.model_architecture.split(' ')] trial_num = args.trial_num trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations) if args.trial_note: trial_name += "_{}".format(args.trial_note) params = { "robot_type": args.robot_type, "t_interval": args.t_interval, "a_upper": args.a_upper, "a_lower": args.a_lower, "no_joint_limit:": args.no_joint_limit, "trial_num": args.trial_num, "trial_note": args.trial_note, "episodes": args.episodes, "iterations": args.iterations, "reward_func": args.reward_func, "network_update_freq": args.network_update_freq, "epsilon_min": args.epsilon_min, "batch_size": args.batch_size, "learning_rate": args.learning_rate, "model_architecture": args.model_architecture, } dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=check_singularity, input_dim=len(robot.state) + 2, output_dim=1, actions_params=(-pi/8, pi/8, pi/8), model_architecture=model_architecture, memory_size=total_iterations//50, memory_buffer_coef=5, #5 don't forget to change back to 20! randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=params) dqn_agent.run()
action_params = (a_lower, a_upper, a_interval) episodes = params['episodes'] iterations = params['iterations'] total_iterations = episodes * iterations dqn_agent = DQN_Agent( robot=robot, reward_function=physical_forward_reward_function, trial_name="transfer learning test", trial_num=0, episodes=params['episodes'], iterations=params['iterations'], network_update_freq=params['network_update_freq'], check_singularity=False, is_physical_robot=True, input_dim=len(robot.state) + 2, output_dim=1, actions_params=action_params, model_architecture=params['model_architecture'], memory_size=total_iterations // 3, # 10 memory_buffer_coef=20, # 20 randomize_theta=False, batch_size=params['batch_size'], gamma=0.99, epsilon=1.0, epsilon_min=params['epsilon_min'], epsilon_decay=params['epsilon_decay'], learning_rate=params['learning_rate'], params=params) dqn_agent.load_model(json_name=json_fname, h5_name=h5_fname) # Policy Rollout dqn_agent.policy_rollout(timesteps=50)
def main(): robot_type = args.robot_type if robot_type == "physical": robot = PhysicalRobot(delay=args.delay) check_singularity = False is_physical_robot = True else: raise ValueError("Unknown robot type: {}".format(robot_type)) episodes = args.episodes iterations = args.iterations total_iterations = episodes * iterations if args.reward_func == "forward": reward_function = physical_forward_reward_function else: raise ValueError("Unknown reward function: {}".format(args.reward_func)) a_lower = args.a_lower a_upper = args.a_upper a_interval = args.a_interval action_params = (a_lower, a_upper, a_interval) network_update_freq = args.network_update_freq batch_size = args.batch_size epsilon_min = args.epsilon_min epsilon_decay = epsilon_min **(1/total_iterations) learning_rate = args.learning_rate model_architecture = [int(num) for num in args.model_architecture.split('_')] trial_num = args.trial_num trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations) if args.trial_note: trial_name += "_{}".format(args.trial_note) params = { "robot_type": args.robot_type, "delay": args.delay, "a_lower": args.a_lower, "a_upper": args.a_upper, "a_interval": args.a_interval, "trial_num": args.trial_num, "trial_note": args.trial_note, "episodes": args.episodes, "iterations": args.iterations, "reward_func": args.reward_func, "network_update_freq": args.network_update_freq, "epsilon_min": args.epsilon_min, "batch_size": args.batch_size, "learning_rate": args.learning_rate, "model_architecture": args.model_architecture, } dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=check_singularity, is_physical_robot=is_physical_robot, input_dim=len(robot.state) + 2, output_dim=1, actions_params=action_params, model_architecture=model_architecture, memory_size=total_iterations//5, #10 memory_buffer_coef=20, #20 randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=params) dqn_agent.run()