a = randaction #print(" - selecting generated optimal policy ",a) # for i in range (np.alen(a)): # if a[i] < -1: a[i]=-0.99999999999 # if a[i] > 1: a[i] = 0.99999999999 if step % 50 == 0: print("a =>", a) env.render() env.refresh(render=True) qs_a = np.concatenate((qs, a), axis=0) #get the target state and reward s, r, done, info = env.step(a) #record only the first x number of states #if done and step<max_steps-3: # r = -50 if step == 0: gameSA[0] = qs_a gameS[0] = qs gameR[0] = np.array([r]) gameA[0] = np.array([r]) gameW[0] = np.array([0.000000005]) else: gameSA = np.vstack((gameSA, qs_a)) gameS = np.vstack((gameS, qs)) gameR = np.vstack((gameR, np.array([r])))
initial_epsilon = 0.8 ) observation = env.reset() left_or_right_barge_movement = np.random.randint(0, 2) epsilon = 0.05 for episode in range(EPISODES): while True: # 1. Choose an action based on observation action = PG.choose_action(observation) # 2. Take action in the environment observation_, reward, done, info = env.step(action) # 3. Store transition for training # if reward > -0.20: PG.store_transition(observation, action, reward) if RENDER_ENV: # ------------------------------------- # Optional render env.render() # Draw the target env.draw_marker(env.landing_coordinates[0], env.landing_coordinates[1]) # Refresh render env.refresh(render=False) # When should the barge move? Water movement, dynamics etc can be simulated here.