예제 #1
0
#goal will always be upper right
goal_x = GRID_NUM - 1
goal_y = GRID_NUM - 1

#this is just so the red box is never in the same location as the black box
while True:
    neg_x = np.random.randint(1, GRID_NUM - 1)
    neg_y = np.random.randint(1, GRID_NUM - 1)
    if ((neg_x is not black_hole_loc_y) and (neg_y is not black_hole_loc_y)):
        break

num_actions = list(range(4))
agent = rl_agent(
    list(range(4)),  #UP,DOWN,LEFT,RIGHT
    GAMMA,  #gamma
    ALPHA,
    EPSILON)

#agent.init_q_table()
model = agent.init_NN(num_outputs=num_actions, size=GRID_NUM * GRID_NUM)

#drawScene(black_hole_loc_x, black_hole_loc_y,ball_x,ball_y,goal_x,goal_y, neg_x,neg_y)
#initalize state matrix
state = getState(black_hole_loc_x, black_hole_loc_y, ball_x, ball_y, goal_x,
                 goal_y, neg_x, neg_y)

start = time.time()
total_time = time.time()
episodes = 0
steps = 0
예제 #2
0
    if int(action) == 1:
        robot_pos = robot_pos - 1
    elif int(action) == 2:
        robot_pos = robot_pos + 1

    return robot_pos


#RL constants
EPSILON = 1  # greedy police
ALPHA = 0.5  # learning rate
GAMMA = 1  # discount

agent = rl_agent(
    list(range(3)),  #STAY,LEFT,RIGHT
    GAMMA,
    ALPHA,
    EPSILON)

agent.init_q_table()

#resetRobot()

i = 0
epochs = 10000
grid_dim = (5, 8)
robot_pos = 0

while (True):
    #while(i < epochs):
예제 #3
0
def get_reward(robot_x, robot_y, ball_x,thrust ):
    '''
    return DISTANCEFORMULA
    '''

    return -1


#RL constants
EPSILON =       1    # greedy police
ALPHA =         0.5     # learning rate
GAMMA =         1       # discount 

agent = rl_agent(
                list(range(5)), #STAY,LEFT,RIGHT, UP, DOWN
                GAMMA,
                ALPHA,
                EPSILON
               )

robot_spaces_x = 5 #grids
robot_spaces_y = 3 #grids
gantry_plate_size = 2.6771654 #inches
robot = virtual_robot(
                    WIDTH = 1680,#width of monitor
                    HEIGHT = 1050, #height of monitor
                    dpi = 99, #dpi of the monitor
                    roi_x = 12, #inches, this is how wide in x you want the robot range to be, the black part of the virtual robot inches
                    roi_y = 8, #inches
                    number_of_states_x = robot_spaces_x, #0,1,2,3,4
                    number_of_states_y = robot_spaces_y, #0,1,2
                    initial_x = 2, #middle x
예제 #4
0
if TYPE == "x" or TYPE == "y":
    num_actions = 3
    num_states = 2
else:
    num_actions = 5
    num_states = 4

#RL constants
EPSILON = 1  # greedy police
ALPHA = 0.5  # learning rate
GAMMA = 1  # discount

agent = rl_agent(
    list(range(num_actions)),  #STAY,LEFT,RIGHT
    GAMMA,
    ALPHA,
    EPSILON)

agent.init_q_table()

catch_count = 0
total_reward = 0
last_catch = 0
last_reward = 0
total_rewards = []
num_epochs = []
average_epochs = []
total_loss = []
total_catches = []
average_catches = []