def policy_rollout(agent, path, t_interval=1, timesteps=200): for j in range(1): robot = SwimmingRobot(a1=0, a2=0, t_interval=t_interval) xs = [robot.x] ys = [robot.y] thetas = [robot.theta] a1s = [robot.a1] a2s = [robot.a2] steps = [0] # robot.randomize_state(enforce_opposite_angle_signs=True) robot_params = [] robot_param = [ robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot ] robot_params.append(robot_param) print('Beginning', j + 1, 'th Policy Rollout') try: for i in range(timesteps): # rollout state = robot.state print('In', i + 1, 'th iteration the initial state is: ', state) old_x = robot.x action = agent.choose_action(state) print('In', i + 1, 'th iteration the chosen action is: ', action) robot.move(action=action) new_x = robot.x print('In', i + 1, 'th iteration, the robot moved ', new_x - old_x, ' in x direction') # add values to lists xs.append(robot.x) ys.append(robot.y) thetas.append(robot.theta) a1s.append(robot.a1) a2s.append(robot.a2) steps.append(i + 1) robot_param = [ robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot ] robot_params.append(robot_param) except ZeroDivisionError as e: print(str(e), 'occured at ', j + 1, 'th policy rollout') # plotting make_rollout_graphs(xs, ys, thetas, a1s, a2s, steps, path=path) generate_csv(robot_params, path + "/policy_rollout.csv")
def get_random_edge_states(): num = np.random.rand() if num < 0.2: print('Normal robot!') robot = SwimmingRobot(t_interval=1) elif num < 0.4: print('edge case 1!') robot = SwimmingRobot(a1=-pi / 2, a2=pi / 2, t_interval=0.5) elif num < 0.6: print('edge case 2!') robot = SwimmingRobot(a1=-pi / 2, a2=-pi / 2, t_interval=0.5) elif num < 0.8: print('edge case 3!') robot = SwimmingRobot(a1=pi / 2, a2=-pi / 2, t_interval=0.5) else: print('edge case 4') robot = SwimmingRobot(a1=pi / 2, a2=pi / 2, t_interval=0.5) return robot
def main(): # 0.99996 for 30000 iterations # 0.999 for 1000 iterations # 0.9998 for 10000 iterations # 0.99995 for 20000 # 0.999965 for 40000 # 0.999955 for 50000 # 0.999975 for 60000 # 0.999977 for 100000 # 0.999993 for 200000 # 0.999997 for 500000 # 0.999997for 1000000 # 0.999999 for 2000000 # 0.9999994 for 3000000 # 0.9999997 for 6000000 robot = SwimmingRobot(t_interval=8) trial_name = 'DQN_swimming_w_theta_forward_20000_iters' trial_num = 0 reward_function = forward_reward_function episodes = 20 iterations = 1000 total_iterations = episodes * iterations network_update_freq = 20 batch_size = 8 epsilon_decay = 0.99995 learning_rate = 2e-4 model_architecture = (50, 10) dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=False, input_dim=5, output_dim=1, actions_params=(-pi/8, pi/8, pi/8), model_architecture=model_architecture, memory_size=total_iterations//50, memory_buffer_coef=20, randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=0.1, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=None) dqn_agent.run()
def main(): robot_type = args.robot_type if robot_type == "swimming": robot = SwimmingRobot(t_interval=args.t_interval, a_upper=args.a_upper, a_lower=args.a_lower, no_joint_limit=args.no_joint_limit) check_singularity = False elif robot_type == "wheeled": robot = ThreeLinkRobot(t_interval=args.t_interval) check_singularity = True else: raise ValueError("Unknown robot type: {}".format(robot_type)) episodes = args.episodes iterations = args.iterations total_iterations = episodes * iterations if args.reward_func == "forward": reward_function = forward_reward_function elif args.reward_func == "left": reward_function = left_reward_function else: raise ValueError("Unknown reward function: {}".format(args.reward_func)) network_update_freq = args.network_update_freq batch_size = args.batch_size epsilon_min = args.epsilon_min epsilon_decay = epsilon_min ** (1/total_iterations) learning_rate = args.learning_rate model_architecture = [int(num) for num in args.model_architecture.split(' ')] trial_num = args.trial_num trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations) if args.trial_note: trial_name += "_{}".format(args.trial_note) params = { "robot_type": args.robot_type, "t_interval": args.t_interval, "a_upper": args.a_upper, "a_lower": args.a_lower, "no_joint_limit:": args.no_joint_limit, "trial_num": args.trial_num, "trial_note": args.trial_note, "episodes": args.episodes, "iterations": args.iterations, "reward_func": args.reward_func, "network_update_freq": args.network_update_freq, "epsilon_min": args.epsilon_min, "batch_size": args.batch_size, "learning_rate": args.learning_rate, "model_architecture": args.model_architecture, } dqn_agent = DQN_Agent(robot=robot, reward_function=reward_function, trial_name=trial_name, trial_num=trial_num, episodes=episodes, iterations=iterations, network_update_freq=network_update_freq, check_singularity=check_singularity, input_dim=len(robot.state) + 2, output_dim=1, actions_params=(-pi/8, pi/8, pi/8), model_architecture=model_architecture, memory_size=total_iterations//50, memory_buffer_coef=5, #5 don't forget to change back to 20! randomize_theta=False, batch_size=batch_size, gamma=0.99, epsilon=1.0, epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, learning_rate=learning_rate, params=params) dqn_agent.run()
from Robots.ContinuousDeepRobots import ThreeLinkRobot from Robots.ContinuousSwimmingBot import SwimmingRobot from math import pi def generate_csv(robot_params, filename): with open(filename, 'w') as file: w = csv.writer(file) w.writerows(robot_params) if __name__ == "__main__": robot_params = [] # robot = ThreeLinkRobot(a1=-0.01, a2=0.01, t_interval=0.02) robot = SwimmingRobot(t_interval=1, a1=0, a2=0) robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot] robot_params.append(robot_param) # for i in range(50): # print('i: ', i) # if i%2 == 0: # action = (-pi/2, pi/2) # else: # action = (pi/2, -pi/2) # for j in range(40): # print('j: ', j) # print('a1 a2: ', robot.a1, robot.a2) # robot.move(action) # robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot] # robot_params.append(robot_param)
def perform_DQN(agent, episodes, iterations, path, batch_size=4, C=30, t_interval=1, randomize_theta=False): """ :param agent: the RL agent :param batch_size: size of minibatch sampled from replay buffer :param C: network update frequency :return: agent, and other information about DQN """ avg_losses = [] std_losses = [] avg_rewards = [] std_rewards = [] avg_Qs = [] std_Qs = [] # gd_iterations = [] # gradient descent iterations # gd_iteration = 0 num_episodes = [] try: # loop through each episodes for e in range(1, episodes + 1): # save model if e % (episodes / 10) == 0: agent.save_model(path, e) theta = random.uniform(-pi / 4, pi / 4) if randomize_theta else 0 robot = SwimmingRobot(a1=0, a2=0, theta=theta, t_interval=t_interval) # state = robot.randomize_state() state = robot.state rewards = [] losses = [] Qs = [] # loop through each iteration for i in range(1, iterations + 1): # print('In ', e, ' th epsiode, ', i, ' th iteration, the initial state is: ', state) action = agent.choose_action(state, epsilon_greedy=True) print( 'In {}th epsiode {}th iteration, the chosen action is: {}'. format(e, i, action)) robot_after_transition, reward, next_state = agent.act( robot=robot, action=action, c_x=50, c_joint=0, c_zero_x=50, c_theta=5) print('The reward is: {}'.format(reward)) rewards.append(reward) # print('In ', e, ' th epsiode, ', i, ' th iteration, the state after transition is: ', next_state) agent.remember(state, action, reward, next_state) state = next_state robot = robot_after_transition if len(agent.memory) > agent.memory_size / 20: loss, Q = agent.replay(batch_size) # gd_iteration += 1 losses.append(loss) Qs.append(Q) # gd_iterations.append(gd_iteration) print('The average loss is: {}'.format(loss)) print('The average Q is: {}'.format(Q)) if i % C == 0: agent.update_model() num_episodes.append(e) avg_rewards.append(np.mean(rewards)) std_rewards.append(np.std(rewards)) avg_losses.append(np.mean(losses)) std_losses.append(np.std(losses)) avg_Qs.append(np.mean(Qs)) std_Qs.append(np.std(Qs)) except TypeError as e: print(e) finally: # save learning data save_learning_data(path, num_episodes, avg_rewards, std_rewards, avg_losses, std_losses, avg_Qs, std_Qs) return agent, num_episodes, avg_rewards, std_rewards, avg_losses, std_losses, avg_Qs, std_Qs
# print('Normal robot!') # robot = SwimmingRobot(t_interval=1) # elif num < 0.4: # print('edge case 1!') # robot = SwimmingRobot(a1=-pi/2, a2=pi/2, t_interval=0.5) # elif num < 0.6: # print('edge case 2!') # robot = SwimmingRobot(a1=-pi/2, a2=-pi/2, t_interval=0.5) # elif num < 0.8: # print('edge case 3!') # robot = SwimmingRobot(a1=pi/2, a2=-pi/2, t_interval=0.5) # else: # print('edge case 4') # robot = SwimmingRobot(a1=pi/2, a2=pi/2, t_interval=0.5) robot = SwimmingRobot(a1=0, a2=0, t_interval=1) # state = robot.randomize_state() state = robot.state rewards = [] losses = [] for i in range(1, ITERATIONS + 1): # print('In ', e, ' th epsiode, ', i, ' th iteration, the initial state is: ', state) action = agent.choose_action(state, fixed_policy=True) print('In ', e, ' th epsiode, ', i, ' th iteration, the chosen action is: ', action) robot_after_transition, reward, next_state = agent.act(robot, action) print('In ', e, ' th epsiode, ', i, ' th iteration, the reward is: ', reward) rewards.append(reward) # print('In ', e, ' th epsiode, ', i, ' th iteration, the state after transition is: ', next_state) agent.remember(state, action, reward, next_state)