コード例 #1
0
ファイル: crimsonbot.py プロジェクト: hoosierEE/crimsonbot
def run(wait=1):
    import robot  # pylint: disable=import-error
    try:
        robot.setup()
        while True:
            delay(wait)
            robot.loop()
    except:
        raise
    finally:
        stop()
        gc.collect()
        del sys.modules['robot']
コード例 #2
0
def setup():
    global sub_a, sub_s, in_to_states, _count
    sub_s = np.empty(0)
    sub_a = np.empty(0)
    in_to_states = np.full((n_inputs, int(max(in_sizes)), n_states),
                           -1,
                           dtype=np.int)
    _count = np.full((n_inputs, int(max(in_sizes))), 0, dtype=np.int)
    robot.setup(task.AGENT_ELEMENT, task.ENV_ELEMENT)
    print('robot setup!')
    generate_subs()
    print('generate subs!')
    generate_suba()
    print('generate suba!')
    generate_intostates()
    print('generate intostates!')

    #initiated=True
    return
コード例 #3
0
def setup():
    """ Create the variables needed for this module """
    global Vs, Va, VAR, cont_VAR, initiated, goal_reached

    robot.setup(task.AGENT_ELEMENTS, task.ENV_ELEMENTS)

    Vs = 0
    Va = 0
    VAR = np.full((n_inputs, int(max(in_sizes)), n_states), -1, dtype=np.int)
    cont_VAR = np.full((n_inputs, int(max(in_sizes))), 0, dtype=np.int)

    generate_vs()
    generate_va()
    generate_var()

    action_selection.setup()

    goal_reached = False
    initiated = True
    print("Agent initiated")
    return
コード例 #4
0
def run():
    
    import sys
    if sys.version_info[0] < 3:
        sys.exit("Python 3 required")
        
    expset.check()
    
    # Copy the selected taskfile to speed up the execution:
    try:
        copyfile("tasks/" + expset.TASK_ID + ".py", "task.py")
    except IOError:
        sys.exit("Task " + expset.TASK_ID + " not found. Please check exp.TASK_ID")
    import task
    import robot
    #import lp
    import show
    import save
    
    task.setup()
    
    caption = (expset.TASK_ID)
    path = save.new_dir(results_path, caption)  # Create result directory

#    epi = 0
#    # Average Reward per step (aveR):
#    ave_r = np.zeros((expset.N_REPETITIONS, expset.N_STEPS))
#    # Mean(aveR) of all tests per step
#    mean_ave_r = np.zeros(expset.N_STEPS)
#    # AveR per episode
#    epi_ave_r = np.zeros([expset.N_REPETITIONS, expset.N_EPISODES])
#    # actual step time per episode (for computational cost only)
#    actual_step_time = np.zeros(expset.N_REPETITIONS)
    
    robot.connect()  # Connect to V-REP / ROS

#    if expset.CONTINUE_PREVIOUS_EXP:
#        prev_exp = __import__(expset.PREVIOUS_EXP_FILE)
#        print("NOTE: Continue experiments from: " + expset.PREVIOUS_EXP_FILE)
#        time.sleep(3)
        
    # Experiment repetition loop --------------------------------------------
    for rep in range(expset.N_REPETITIONS):
        
        # Training parameters
        action_dic = {'0': 'FORWARD', 
                      '1': 'FULL_RIGHT', 
                      '2': 'FULL_LEFT', 
                      '3': 'HALF_RIGHT',
                      '4': 'HALF_LEFT'}
        batch_size = 32#32 # How many experiences to use for each training step.
        update_freq = 4 # How often to perform a training step.
        y = .99 # Discount factor on the target Q-values
        startE = 1 # Starting chance of random action
        endE = 0.1 # Final chance of random action
        anneling_steps = 100000 # How many steps of training to reduce startE to endE.
#        num_episodes = 500000 # How many episodes of game environment to train network with.
        
        pre_train_steps = 350 # 10000 #How many steps of random actions before training begins.
#        simulation_time = 400 # 200 
#        max_epLength = 400 # 200 # the same as simulation time
        tau = 0.001 # Rate to update target network toward primary network        
        obs_dim1 = 96 # Size of the visual input
        obs_dim2 = 128
        action_size = len(action_dic)
#        num_sim_steps = 30000
        load_model = False
        
        # Learning algorithm initialization
        tf.reset_default_graph()
        mainQN = DeepQNetwork(obs_dim1, obs_dim2, action_size)
        targetQN = DeepQNetwork(obs_dim1, obs_dim2, action_size)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        trainables = tf.trainable_variables()
        targetOps = algorithm_DQN.updateTargetGraph(trainables, tau)
        copyOps = algorithm_DQN.updateTargetGraph(trainables, 1.0)
        myBuffer = experience_buffer()
        
        #Set the rate of random action decrease
        e = startE
        stepDrop = (startE - endE)/anneling_steps
                
        # Create lists for counting         
        stepList = [] # List of total steps of each epi
        rAllList = [] # List of total rewards of each epi
        rAveList = [] # List of average reward of each epi 
        total_steps = 0 # Count total steps of each repetition
        
        
        
        with tf.Session() as sess:
            sess.run(init)
            if load_model == True:
                print("Loading model ... ")
                ckpt = tf.train.get_checkpoint_state(path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            
            # Episode loop ----------------------------------------------------
            for epi in range(expset.N_EPISODES):                
                   
                robot.start()               
                show.process_count(caption, rep, epi)
                robot.setup()
                
                episodeBuffer = experience_buffer()            
                s = robot.get_observation()
                s = algorithm_DQN.processState(s, obs_dim1, obs_dim2) # added for DQN version 1
                sg = robot.get_goal_relpose_2d() # added for DQN version 2
                
                rAll = 0.0 # total reward per episode
                rAve = 0.0 # average reward per episode
                d = False # if reach the destination
                
                step_time = task.STEP_TIME / expset.SPEED_RATE # set delay for each step (s)
                actual_step_time = 0.0 # calculate actual time elapsed for each step (s)
                time_mark = time.time() # start timestamp                
                
                for step in range(0, expset.N_STEPS):
                    if np.random.rand(1) < e or total_steps < pre_train_steps:
                        a = np.random.randint(0,len(action_dic))
                    else:
#                        a = sess.run(mainQN.predict,feed_dict={mainQN.observation:np.expand_dims(s, axis=0)})[0]
#                        a = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:[s]})[0] # added for DQN version 1
                        a = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:[s],
                                                               mainQN.goalInput:[sg]})[0] # added for DQN version 2
                                                               
                    print("action at step " + str(step+1) + ": " + action_dic[str(a)])
                            
                    # Update robot motion
                    move_direction = action_dic[str(a)]
                    if move_direction == 'FORWARD':
                        robot.move_wheels(1*task.MAX_SPEED, 1*task.MAX_SPEED)
                    elif move_direction == 'FULL_RIGHT':
                        robot.move_wheels(1*task.MAX_SPEED, -1*task.MAX_SPEED)
                    elif move_direction == 'FULL_LEFT':
                        robot.move_wheels(-1*task.MAX_SPEED, 1*task.MAX_SPEED)
                    elif move_direction == 'HALF_RIGHT':
                        robot.move_wheels(1.5*task.MAX_SPEED, 0.5*task.MAX_SPEED)
                    elif move_direction == 'HALF_LEFT':
                        robot.move_wheels(0.5*task.MAX_SPEED, 1.5*task.MAX_SPEED)
                    
                    time.sleep(step_time) # Delay for step_time (s)
                        
                    robot.update()        
                    # Get new observation and reward
                    s1 = robot.get_observation()                    
                    s1 = algorithm_DQN.processState(s1, obs_dim1, obs_dim2) # added for DQN version 1
                    sg1 = robot.get_goal_relpose_2d() # added for DQN version 2
                    r = task.get_reward()
                    d = task.reach_goal()                
                    
                    total_steps += 1                    
                    # Save to experience buffer
#                    episodeBuffer.add(np.reshape(np.array([s,a,r,s1,d]),[1,5]))
                    episodeBuffer.add(np.reshape(np.array([s,a,r,s1,d,sg,sg1]),[1,7])) # added for DQN version 2
                    
                    # Update Deep Q-Network
                    if total_steps > pre_train_steps:
                        if e > endE:
                            e -= stepDrop
                        if total_steps % (update_freq) == 0:
                            trainBatch = myBuffer.sample(batch_size) # Get a random batch of experiences
                            # Perform the Double-DQN update to the target Q-values
#                            Q1 = sess.run(mainQN.predict, feed_dict={mainQN.observation:np.reshape(np.vstack(trainBatch[:,3]), [batch_size, obs_dim1, obs_dim2])})
#                            Q2 = sess.run(targetQN.Qout, feed_dict={targetQN.observation:np.reshape(np.vstack(trainBatch[:,3]), [batch_size, obs_dim1, obs_dim2])})
#                            Q1 = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,3])}) # added for DQN version 1
#                            Q2 = sess.run(targetQN.Qout,feed_dict={targetQN.scalarInput:np.vstack(trainBatch[:,3])}) # added for DQN version 1
                            Q1 = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,3]),
                                                                    mainQN.goalInput:np.vstack(trainBatch[:,6])}) # added for DQN version 2
                            Q2 = sess.run(targetQN.Qout,feed_dict={targetQN.scalarInput:np.vstack(trainBatch[:,3]),
                                                                   targetQN.goalInput:np.vstack(trainBatch[:,6])}) # added for DQN version 2 
                            end_multiplier =- (trainBatch[:,4] - 1)
                            doubleQ = Q2[range(batch_size), Q1]
                            targetQ = trainBatch[:,2] + (y*doubleQ * end_multiplier)
                            # Update the network with our target values
#                            _ = sess.run(mainQN.updateModel, feed_dict={ mainQN.observation:np.reshape(np.vstack(trainBatch[:,0]), [batch_size, obs_dim1, obs_dim2]),
#                                                                         mainQN.targetQ:targetQ,
#                                                                         mainQN.actions:trainBatch[:,1]})
#                            _ = sess.run(mainQN.updateModel, feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,0]),
#                                                                        mainQN.targetQ:targetQ, 
#                                                                        mainQN.actions:trainBatch[:,1]}) # added for DQN version 1
                            _ = sess.run(mainQN.updateModel, feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,0]),
                                                                        mainQN.goalInput:np.vstack(trainBatch[:,5]),
                                                                        mainQN.targetQ:targetQ, 
                                                                        mainQN.actions:trainBatch[:,1]}) # added for DQN version 2
                            # Update the target network toward the primary network
                            algorithm_DQN.updateTarget(targetOps, sess)
                            
                    rAll += r
                    s = s1
                    sg = sg1 # added for DQN version 2
                    
                    if d == True: # End the episode if destination is reached
                        break
                    
                    print("reward at step " + str(step+1) + ": " + str(r))
                    print("total steps: " + str(total_steps))
                    print(" "*10)
                # End of one episode ---------------------------------------
                
                rAve = rAll / (step + 1)                
                actual_step_time = (time.time() - time_mark) / (step + 1)

                show.epi_summary(caption, rep, epi, step+1, rAll, rAve, actual_step_time)                
                
                myBuffer.add(episodeBuffer.buffer)
                stepList.append(step+1)
                rAllList.append(rAll)
                rAveList.append(rAve)
                
                # Periodically save the model
                if epi % 1000 == 0:
                    saver.save(sess, path + '/model-' + str(epi) + '.ckpt')
                    print("Model saved")
            
            saver.save(sess, path + '/model-' + str(epi) + '.ckpt')
コード例 #5
0
import robot
from math import *

import threading

speed = 1
target_frames = 30

maze = maze.Maze(5, 5)
maze.generate()
lines = []
# lines = [((0,0), (0,40)), ((0,0), (40,0)), ((40,0), (40,40)), ((0,40), (40,40))]
for l in maze.getWalls(10, 10):
    lines.append(l)

robot = robot.setup(100, 100, lines)


def redraw():
    draw.clear()

    for i in range(speed):
        robot.update()

    for l in lines:
        draw.line(*l)

    robot.draw()
    draw.camera = robot.getPos()
    draw.ontimer(redraw, 1000 // target_frames)