#goal will always be upper right goal_x = GRID_NUM - 1 goal_y = GRID_NUM - 1 #this is just so the red box is never in the same location as the black box while True: neg_x = np.random.randint(1, GRID_NUM - 1) neg_y = np.random.randint(1, GRID_NUM - 1) if ((neg_x is not black_hole_loc_y) and (neg_y is not black_hole_loc_y)): break num_actions = list(range(4)) agent = rl_agent( list(range(4)), #UP,DOWN,LEFT,RIGHT GAMMA, #gamma ALPHA, EPSILON) #agent.init_q_table() model = agent.init_NN(num_outputs=num_actions, size=GRID_NUM * GRID_NUM) #drawScene(black_hole_loc_x, black_hole_loc_y,ball_x,ball_y,goal_x,goal_y, neg_x,neg_y) #initalize state matrix state = getState(black_hole_loc_x, black_hole_loc_y, ball_x, ball_y, goal_x, goal_y, neg_x, neg_y) start = time.time() total_time = time.time() episodes = 0 steps = 0
if int(action) == 1: robot_pos = robot_pos - 1 elif int(action) == 2: robot_pos = robot_pos + 1 return robot_pos #RL constants EPSILON = 1 # greedy police ALPHA = 0.5 # learning rate GAMMA = 1 # discount agent = rl_agent( list(range(3)), #STAY,LEFT,RIGHT GAMMA, ALPHA, EPSILON) agent.init_q_table() #resetRobot() i = 0 epochs = 10000 grid_dim = (5, 8) robot_pos = 0 while (True): #while(i < epochs):
def get_reward(robot_x, robot_y, ball_x,thrust ): ''' return DISTANCEFORMULA ''' return -1 #RL constants EPSILON = 1 # greedy police ALPHA = 0.5 # learning rate GAMMA = 1 # discount agent = rl_agent( list(range(5)), #STAY,LEFT,RIGHT, UP, DOWN GAMMA, ALPHA, EPSILON ) robot_spaces_x = 5 #grids robot_spaces_y = 3 #grids gantry_plate_size = 2.6771654 #inches robot = virtual_robot( WIDTH = 1680,#width of monitor HEIGHT = 1050, #height of monitor dpi = 99, #dpi of the monitor roi_x = 12, #inches, this is how wide in x you want the robot range to be, the black part of the virtual robot inches roi_y = 8, #inches number_of_states_x = robot_spaces_x, #0,1,2,3,4 number_of_states_y = robot_spaces_y, #0,1,2 initial_x = 2, #middle x
if TYPE == "x" or TYPE == "y": num_actions = 3 num_states = 2 else: num_actions = 5 num_states = 4 #RL constants EPSILON = 1 # greedy police ALPHA = 0.5 # learning rate GAMMA = 1 # discount agent = rl_agent( list(range(num_actions)), #STAY,LEFT,RIGHT GAMMA, ALPHA, EPSILON) agent.init_q_table() catch_count = 0 total_reward = 0 last_catch = 0 last_reward = 0 total_rewards = [] num_epochs = [] average_epochs = [] total_loss = [] total_catches = [] average_catches = []