def observe_state(): """ Returns the reached state s' from robot """ assert initiated, "agent not initiated! setup() must be previously executed" unwrapped_s = np.zeros(n_inputs) # Special cases if exp.TEACHING_PROCESS: # Observed states are already given from lp import step return exp.TAUGHT_SASR[step, 2] elif exp.LEARN_FROM_MODEL: from lp import s, a import model return model.get_sp(s, a) # return reached state s' robot.update() input_data = task.get_input_data() for i in range(n_inputs): aux = np.digitize(input_data[i], in_values[i], right=True) unwrapped_s[i] = int(np.clip(aux - 1, 0, in_sizes[i] - 1)) # print("var: "+str(i)) # print(input_data[i]) # print(INPUT[i]) # print(aux) # print(unwrapped_s[i]) state = wrap_state(unwrapped_s) assert (0 <= state < n_states), ("Wrong state: ", str(state)) return state
def redraw(): draw.clear() for i in range(speed): robot.update() for l in lines: draw.line(*l) robot.draw() draw.camera = robot.getPos() draw.ontimer(redraw, 1000 // target_frames)
def observestate(): global n_inputs, in_values, in_sizes unwrappedstate = np.zeros(n_inputs) robot.update() inputdata = task.get_input_data() print("inputdata:", inputdata) for i in range(n_inputs): aux = np.digitize(inputdata[i], in_values[i], right=True) unwrappedstate[i] = int(np.clip(aux - 1, 0, in_sizes[i] - 1)) state = wrap_state(unwrappedstate) print('state:', state) return state
def run(): import sys if sys.version_info[0] < 3: sys.exit("Python 3 required") expset.check() # Copy the selected taskfile to speed up the execution: try: copyfile("tasks/" + expset.TASK_ID + ".py", "task.py") except IOError: sys.exit("Task " + expset.TASK_ID + " not found. Please check exp.TASK_ID") import task import robot #import lp import show import save task.setup() caption = (expset.TASK_ID) path = save.new_dir(results_path, caption) # Create result directory # epi = 0 # # Average Reward per step (aveR): # ave_r = np.zeros((expset.N_REPETITIONS, expset.N_STEPS)) # # Mean(aveR) of all tests per step # mean_ave_r = np.zeros(expset.N_STEPS) # # AveR per episode # epi_ave_r = np.zeros([expset.N_REPETITIONS, expset.N_EPISODES]) # # actual step time per episode (for computational cost only) # actual_step_time = np.zeros(expset.N_REPETITIONS) robot.connect() # Connect to V-REP / ROS # if expset.CONTINUE_PREVIOUS_EXP: # prev_exp = __import__(expset.PREVIOUS_EXP_FILE) # print("NOTE: Continue experiments from: " + expset.PREVIOUS_EXP_FILE) # time.sleep(3) # Experiment repetition loop -------------------------------------------- for rep in range(expset.N_REPETITIONS): # Training parameters action_dic = {'0': 'FORWARD', '1': 'FULL_RIGHT', '2': 'FULL_LEFT', '3': 'HALF_RIGHT', '4': 'HALF_LEFT'} batch_size = 32#32 # How many experiences to use for each training step. update_freq = 4 # How often to perform a training step. y = .99 # Discount factor on the target Q-values startE = 1 # Starting chance of random action endE = 0.1 # Final chance of random action anneling_steps = 100000 # How many steps of training to reduce startE to endE. # num_episodes = 500000 # How many episodes of game environment to train network with. pre_train_steps = 350 # 10000 #How many steps of random actions before training begins. # simulation_time = 400 # 200 # max_epLength = 400 # 200 # the same as simulation time tau = 0.001 # Rate to update target network toward primary network obs_dim1 = 96 # Size of the visual input obs_dim2 = 128 action_size = len(action_dic) # num_sim_steps = 30000 load_model = False # Learning algorithm initialization tf.reset_default_graph() mainQN = DeepQNetwork(obs_dim1, obs_dim2, action_size) targetQN = DeepQNetwork(obs_dim1, obs_dim2, action_size) init = tf.global_variables_initializer() saver = tf.train.Saver() trainables = tf.trainable_variables() targetOps = algorithm_DQN.updateTargetGraph(trainables, tau) copyOps = algorithm_DQN.updateTargetGraph(trainables, 1.0) myBuffer = experience_buffer() #Set the rate of random action decrease e = startE stepDrop = (startE - endE)/anneling_steps # Create lists for counting stepList = [] # List of total steps of each epi rAllList = [] # List of total rewards of each epi rAveList = [] # List of average reward of each epi total_steps = 0 # Count total steps of each repetition with tf.Session() as sess: sess.run(init) if load_model == True: print("Loading model ... ") ckpt = tf.train.get_checkpoint_state(path) saver.restore(sess, ckpt.model_checkpoint_path) # Episode loop ---------------------------------------------------- for epi in range(expset.N_EPISODES): robot.start() show.process_count(caption, rep, epi) robot.setup() episodeBuffer = experience_buffer() s = robot.get_observation() s = algorithm_DQN.processState(s, obs_dim1, obs_dim2) # added for DQN version 1 sg = robot.get_goal_relpose_2d() # added for DQN version 2 rAll = 0.0 # total reward per episode rAve = 0.0 # average reward per episode d = False # if reach the destination step_time = task.STEP_TIME / expset.SPEED_RATE # set delay for each step (s) actual_step_time = 0.0 # calculate actual time elapsed for each step (s) time_mark = time.time() # start timestamp for step in range(0, expset.N_STEPS): if np.random.rand(1) < e or total_steps < pre_train_steps: a = np.random.randint(0,len(action_dic)) else: # a = sess.run(mainQN.predict,feed_dict={mainQN.observation:np.expand_dims(s, axis=0)})[0] # a = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:[s]})[0] # added for DQN version 1 a = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:[s], mainQN.goalInput:[sg]})[0] # added for DQN version 2 print("action at step " + str(step+1) + ": " + action_dic[str(a)]) # Update robot motion move_direction = action_dic[str(a)] if move_direction == 'FORWARD': robot.move_wheels(1*task.MAX_SPEED, 1*task.MAX_SPEED) elif move_direction == 'FULL_RIGHT': robot.move_wheels(1*task.MAX_SPEED, -1*task.MAX_SPEED) elif move_direction == 'FULL_LEFT': robot.move_wheels(-1*task.MAX_SPEED, 1*task.MAX_SPEED) elif move_direction == 'HALF_RIGHT': robot.move_wheels(1.5*task.MAX_SPEED, 0.5*task.MAX_SPEED) elif move_direction == 'HALF_LEFT': robot.move_wheels(0.5*task.MAX_SPEED, 1.5*task.MAX_SPEED) time.sleep(step_time) # Delay for step_time (s) robot.update() # Get new observation and reward s1 = robot.get_observation() s1 = algorithm_DQN.processState(s1, obs_dim1, obs_dim2) # added for DQN version 1 sg1 = robot.get_goal_relpose_2d() # added for DQN version 2 r = task.get_reward() d = task.reach_goal() total_steps += 1 # Save to experience buffer # episodeBuffer.add(np.reshape(np.array([s,a,r,s1,d]),[1,5])) episodeBuffer.add(np.reshape(np.array([s,a,r,s1,d,sg,sg1]),[1,7])) # added for DQN version 2 # Update Deep Q-Network if total_steps > pre_train_steps: if e > endE: e -= stepDrop if total_steps % (update_freq) == 0: trainBatch = myBuffer.sample(batch_size) # Get a random batch of experiences # Perform the Double-DQN update to the target Q-values # Q1 = sess.run(mainQN.predict, feed_dict={mainQN.observation:np.reshape(np.vstack(trainBatch[:,3]), [batch_size, obs_dim1, obs_dim2])}) # Q2 = sess.run(targetQN.Qout, feed_dict={targetQN.observation:np.reshape(np.vstack(trainBatch[:,3]), [batch_size, obs_dim1, obs_dim2])}) # Q1 = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,3])}) # added for DQN version 1 # Q2 = sess.run(targetQN.Qout,feed_dict={targetQN.scalarInput:np.vstack(trainBatch[:,3])}) # added for DQN version 1 Q1 = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,3]), mainQN.goalInput:np.vstack(trainBatch[:,6])}) # added for DQN version 2 Q2 = sess.run(targetQN.Qout,feed_dict={targetQN.scalarInput:np.vstack(trainBatch[:,3]), targetQN.goalInput:np.vstack(trainBatch[:,6])}) # added for DQN version 2 end_multiplier =- (trainBatch[:,4] - 1) doubleQ = Q2[range(batch_size), Q1] targetQ = trainBatch[:,2] + (y*doubleQ * end_multiplier) # Update the network with our target values # _ = sess.run(mainQN.updateModel, feed_dict={ mainQN.observation:np.reshape(np.vstack(trainBatch[:,0]), [batch_size, obs_dim1, obs_dim2]), # mainQN.targetQ:targetQ, # mainQN.actions:trainBatch[:,1]}) # _ = sess.run(mainQN.updateModel, feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,0]), # mainQN.targetQ:targetQ, # mainQN.actions:trainBatch[:,1]}) # added for DQN version 1 _ = sess.run(mainQN.updateModel, feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,0]), mainQN.goalInput:np.vstack(trainBatch[:,5]), mainQN.targetQ:targetQ, mainQN.actions:trainBatch[:,1]}) # added for DQN version 2 # Update the target network toward the primary network algorithm_DQN.updateTarget(targetOps, sess) rAll += r s = s1 sg = sg1 # added for DQN version 2 if d == True: # End the episode if destination is reached break print("reward at step " + str(step+1) + ": " + str(r)) print("total steps: " + str(total_steps)) print(" "*10) # End of one episode --------------------------------------- rAve = rAll / (step + 1) actual_step_time = (time.time() - time_mark) / (step + 1) show.epi_summary(caption, rep, epi, step+1, rAll, rAve, actual_step_time) myBuffer.add(episodeBuffer.buffer) stepList.append(step+1) rAllList.append(rAll) rAveList.append(rAve) # Periodically save the model if epi % 1000 == 0: saver.save(sess, path + '/model-' + str(epi) + '.ckpt') print("Model saved") saver.save(sess, path + '/model-' + str(epi) + '.ckpt')
print "Sending spikes: ", iterActivePlaceCells if robot.rif: if lastBumpCounter >= robot.rif.bumpCounter: print "We have no incoming bump stream!!!!" #robot.rif.setBumpStream(10) lastBumpCounter = robot.rif.bumpCounter for i in xrange(nSubIters): if time.time() > nextImgTime: gui.save("images/%06d.png"%iimg) nextImgTime += Dt iimg += 1 slam.update(dx, dy, dt/5, robot, gui) placeCellCreation(slam) subIterActivePlaceCells += slam.getActivePlaceCells() dxRobot += dx dyRobot += dy if i % 40 == 0: robot.update(dxRobot,dyRobot) dxRobot = 0. dyRobot = 0. robot.update(dxRobot,dyRobot) it += 1 except KeyboardInterrupt: if kvInterface: kvInterface.setQuitCmd(True) if robotInterface: robotInterface.setV(0,0,0) robotInterface.close()
def update(self, dt): for robot in self.robots: robot.update(dt)