Ejemplo n.º 1
0
    agent.play_random(args.random_steps, args)
    stats.write(0, "random")

# loop over epochs
for epoch in range(args.start_epoch, args.epochs):
    logger.info("Epoch #%d" % (epoch + 1))

    if args.train_steps:
        logger.info(" Training for %d steps" % args.train_steps)
        # Set env mode test so that loss of life is considered as terminal
        env.setMode('train')
        stats.reset()
        agent.train(args.train_steps, epoch)
        stats.write(epoch + 1, "train")

        if args.save_weights_prefix:
            filename = args.save_weights_prefix + "_%d.prm" % (epoch + 1)
            logger.info("Saving weights to %s" % filename)
            net.save_weights(filename)

    if args.test_steps:
        logger.info(" Testing for %d steps" % args.test_steps)
        # Set env mode test so that loss of life is not considered as terminal
        env.setMode('test')
        stats.reset()
        agent.test(args.test_steps, epoch)
        stats.write(epoch + 1, "test")

stats.close()
logger.info("All done")
Ejemplo n.º 2
0
class Driver(object):
    '''
    A driver object for the SCRC
    '''

    def __init__(self, args):
        '''Constructor'''
        self.WARM_UP = 0
        self.QUALIFYING = 1
        self.RACE = 2
        self.UNKNOWN = 3
        self.stage = args.stage
        
        self.parser = msgParser.MsgParser()
        self.state = carState.CarState()
        self.control = carControl.CarControl()

        self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0]
        self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0]
        self.num_inputs = 19
        self.num_steers = len(self.steers)
        self.num_speeds = len(self.speeds)
        self.num_actions = self.num_steers + self.num_speeds
        
        self.net = DeepQNetwork(self.num_inputs, self.num_steers, self.num_speeds, args)
        self.mem = ReplayMemory(args.replay_size, self.num_inputs, args)
        self.minibatch_size = args.batch_size

        if args.load_replay:
            self.mem.load(args.load_replay)
        if args.load_weights:
            self.net.load_weights(args.load_weights)
        self.save_weights_prefix = args.save_weights_prefix
        self.save_interval = args.save_interval
        self.save_replay = args.save_replay

        self.enable_training = args.enable_training
        self.enable_exploration = args.enable_exploration
        self.save_csv = args.save_csv
        if self.save_csv:
          self.csv_file = open(args.save_csv, "wb")
          self.csv_writer = csv.writer(self.csv_file)
          self.csv_writer.writerow(['episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'epsilon', 'replay_memory', 'train_steps'])

        self.total_train_steps = 0
        self.exploration_decay_steps = args.exploration_decay_steps
        self.exploration_rate_start = args.exploration_rate_start
        self.exploration_rate_end = args.exploration_rate_end
        self.skip = args.skip

        self.show_sensors = args.show_sensors
        self.show_qvalues = args.show_qvalues

        self.episode = 0
        self.distances = []
        self.onRestart()
        
        if self.show_sensors:
            from sensorstats import Stats
            self.stats = Stats(inevery=8)
        
        if self.show_qvalues:
            from plotq import PlotQ
            self.plotq = PlotQ(self.num_steers, self.num_speeds)

    def init(self):
        '''Return init string with rangefinder angles'''
        self.angles = [0 for x in range(19)]
        
        for i in range(5):
            self.angles[i] = -90 + i * 15
            self.angles[18 - i] = 90 - i * 15
        
        for i in range(5, 9):
            self.angles[i] = -20 + (i-5) * 5
            self.angles[18 - i] = 20 - (i-5) * 5
        
        return self.parser.stringify({'init': self.angles})

    def getState(self):
        #state = np.array([self.state.getSpeedX() / 200.0, self.state.getAngle(), self.state.getTrackPos()])
        #state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) / 200.0
        state = np.array(self.state.getTrack()) / 200.0
        assert state.shape == (self.num_inputs,)
        return state

    def getReward(self, terminal):
        if terminal:
            reward = -1000
        else:
            dist = self.state.getDistFromStart()
            if self.prev_dist is not None:
                reward = max(0, dist - self.prev_dist) * 10
                assert reward >= 0, "reward: %f" % reward
            else:
                reward = 0
            self.prev_dist = dist
            
            #reward -= self.state.getTrackPos()
            #print "reward:", reward
        
        return reward

    def getTerminal(self):
        return np.all(np.array(self.state.getTrack()) == -1)

    def getEpsilon(self):
        # calculate decaying exploration rate
        if self.total_train_steps < self.exploration_decay_steps:
            return self.exploration_rate_start - self.total_train_steps * (self.exploration_rate_start - self.exploration_rate_end) / self.exploration_decay_steps
        else:
            return self.exploration_rate_end
 
    def drive(self, msg):
        # parse incoming message
        self.state.setFromMsg(msg)
        
        # show sensors
        if self.show_sensors:
            self.stats.update(self.state)

        # training
        if self.enable_training and self.mem.count >= self.minibatch_size:
          minibatch = self.mem.getMinibatch()
          self.net.train(minibatch)
          self.total_train_steps += 1
          #print "total_train_steps:", self.total_train_steps

        # skip frame and use the same action as previously
        if self.skip > 0:
            self.frame = (self.frame + 1) % self.skip
            if self.frame != 0:
                return self.control.toMsg()

        # fetch state, calculate reward and terminal indicator  
        state = self.getState()
        terminal = self.getTerminal()
        reward = self.getReward(terminal)
        #print "reward:", reward

        # store new experience in replay memory
        if self.enable_training and self.prev_state is not None and self.prev_steer is not None and self.prev_speed is not None:
            self.mem.add(self.prev_state, self.prev_steer, self.prev_speed, reward, state, terminal)

        # if terminal state (out of track), then restart game
        if terminal:
            #print "terminal state, restarting"
            self.control.setMeta(1)
            return self.control.toMsg()
        else:
            self.control.setMeta(0)

        # choose actions for wheel and speed
        epsilon = self.getEpsilon()
        if self.enable_exploration and random.random() < epsilon:
            #print "random move"
            steer = random.randrange(self.num_steers)
            #speed = random.randrange(self.num_speeds)
            speed = random.randint(2, self.num_speeds-1)
        else:
            # use broadcasting to efficiently produce minibatch of desired size
            minibatch = state + np.zeros((self.minibatch_size, 1))
            Q = self.net.predict(minibatch)
            assert Q.shape == (self.minibatch_size, self.num_actions), "Q.shape: %s" % str(Q.shape)
            #print "steer Q: ", Q[0,:self.num_steers]
            #print "speed Q:", Q[0,-self.num_speeds:]
            steer = np.argmax(Q[0, :self.num_steers])
            speed = np.argmax(Q[0, -self.num_speeds:])
            if self.show_qvalues:
                self.plotq.update(Q[0])
        #print "steer:", steer, "speed:", speed

        # gears are always automatic
        gear = self.gear()

        # set actions
        self.setSteerAction(steer)
        self.setGearAction(gear)
        self.setSpeedAction(speed)

        # remember state and actions 
        self.prev_state = state
        self.prev_steer = steer
        self.prev_speed = speed

        #print "total_train_steps:", self.total_train_steps, "mem_count:", self.mem.count

        #print "reward:", reward, "epsilon:", epsilon

        return self.control.toMsg()

    def gear(self):
        rpm = self.state.getRpm()
        gear = self.state.getGear()
        
        if self.prev_rpm == None:
            up = True
        else:
            if (self.prev_rpm - rpm) < 0:
                up = True
            else:
                up = False
        
        if up and rpm > 7000 and gear < 6:
            gear += 1
        
        if not up and rpm < 3000 and gear > 0:
            gear -= 1
        
        return gear
        
    def setSteerAction(self, steer):
        assert 0 <= steer <= self.num_steers
        self.control.setSteer(self.steers[steer])

    def setGearAction(self, gear):
        assert -1 <= gear <= 6
        self.control.setGear(gear)

    def setSpeedAction(self, speed):
        assert 0 <= speed <= self.num_speeds
        accel = self.speeds[speed]
        if accel >= 0:
            #print "accel", accel
            self.control.setAccel(accel)
            self.control.setBrake(0)
        else:
            #print "brake", -accel
            self.control.setAccel(0)
            self.control.setBrake(-accel)
    
    def onShutDown(self):
        if self.save_weights_prefix:
            self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode) + ".pkl")
        
        if self.save_replay:
            self.mem.save(self.save_replay)

        if self.save_csv:
            self.csv_file.close()

    def onRestart(self):
    
        self.prev_rpm = None
        self.prev_dist = None
        self.prev_state = None
        self.prev_steer = None
        self.prev_speed = None
        self.frame = -1

        if self.episode > 0:
            dist = self.state.getDistRaced()
            self.distances.append(dist)
            epsilon = self.getEpsilon()
            print "Episode:", self.episode, "\tDistance:", dist, "\tMax:", max(self.distances), "\tMedian10:", np.median(self.distances[-10:]), \
                "\tEpsilon:", epsilon, "\tReplay memory:", self.mem.count

            if self.save_weights_prefix and self.save_interval > 0 and self.episode % self.save_interval == 0:
                self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode) + ".pkl")
                #self.mem.save(self.save_weights_prefix + "_" + str(self.episode) + "_replay.pkl")

            if self.save_csv:
                self.csv_writer.writerow([
                    self.episode, 
                    self.state.getDistFromStart(), 
                    self.state.getDistRaced(), 
                    self.state.getCurLapTime(), 
                    self.state.getLastLapTime(), 
                    self.state.getRacePos(), 
                    epsilon, 
                    self.mem.count,
                    self.total_train_steps
                ])
                self.csv_file.flush()

        self.episode += 1
Ejemplo n.º 3
0
if args.random_steps:
  # populate replay memory with random steps
  logger.info("Populating replay memory with %d random moves" % args.random_steps)
  stats.reset()
  agent.play_random(args.random_steps)
  stats.write(0, "random")

# loop over epochs
for epoch in xrange(args.epochs):
  logger.info("Epoch #%d" % (epoch + 1))

  if args.train_steps:
    logger.info(" Training for %d steps" % args.train_steps)
    stats.reset()
    agent.train(args.train_steps, epoch)
    stats.write(epoch + 1, "train")

    if args.save_weights_prefix:
      filename = args.save_weights_prefix + "_%d.prm" % (epoch + 1)
      logger.info("Saving weights to %s" % filename)
      net.save_weights(filename)

  if args.test_steps:
    logger.info(" Testing for %d steps" % args.test_steps)
    stats.reset()
    agent.test(args.test_steps, epoch)
    stats.write(epoch + 1, "test")

stats.close()
logger.info("All done")
Ejemplo n.º 4
0
class Driver(object):
    '''
    A driver object for the SCRC
    '''

    def __init__(self, args):
        '''Constructor'''
        self.WARM_UP = 0
        self.QUALIFYING = 1
        self.RACE = 2
        self.UNKNOWN = 3
        self.stage = args.stage
        
        self.parser = msgParser.MsgParser()
        self.state = carState.CarState()
        self.control = carControl.CarControl()

        self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0]
        self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0]
        self.num_inputs = 19
        self.num_steers = len(self.steers)
        self.num_speeds = len(self.speeds)
        self.num_actions = self.num_steers + self.num_speeds
        
        self.net = DeepQNetwork(self.num_inputs, self.num_steers, self.num_speeds, args)
        self.mem = ReplayMemory(args.replay_size, self.num_inputs, args)
        self.minibatch_size = args.batch_size

        if args.load_weights:
            self.net.load_weights(args.load_weights)
        self.save_weights_prefix = args.save_weights_prefix
        self.pretrained_network = args.pretrained_network

        self.steer_lock = 0.785398
        self.max_speed = 100

        self.algorithm = args.algorithm
        self.device = args.device
        self.mode = args.mode
        self.maxwheelsteps = args.maxwheelsteps
        
        self.enable_training = args.enable_training
        self.enable_exploration = args.enable_exploration

        self.total_train_steps = 0
        self.exploration_decay_steps = args.exploration_decay_steps
        self.exploration_rate_start = args.exploration_rate_start
        self.exploration_rate_end = args.exploration_rate_end

        self.show_sensors = args.show_sensors
        self.show_qvalues = args.show_qvalues

        self.episode = 0
        self.onRestart()
        
        if self.show_sensors:
            from sensorstats import Stats
            self.stats = Stats(inevery=8)
        
        if self.show_qvalues:
            from plotq import PlotQ
            self.plotq = PlotQ(self.num_steers, self.num_speeds)

        if self.device == 'wheel':
            from wheel import Wheel
            self.wheel = Wheel(args.joystick_nr, args.autocenter, args.gain, args.min_force, args.max_force)

    def init(self):
        '''Return init string with rangefinder angles'''
        self.angles = [0 for x in range(19)]
        
        for i in range(5):
            self.angles[i] = -90 + i * 15
            self.angles[18 - i] = 90 - i * 15
        
        for i in range(5, 9):
            self.angles[i] = -20 + (i-5) * 5
            self.angles[18 - i] = 20 - (i-5) * 5
        
        return self.parser.stringify({'init': self.angles})

    def getState(self):
        #state = np.array([self.state.getSpeedX() / 200.0, self.state.getAngle(), self.state.getTrackPos()])
        #state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) / 200.0
        state = np.array(self.state.getTrack()) / 200.0
        assert state.shape == (self.num_inputs,)
        return state

    def getReward(self, terminal):
        if terminal:
            reward = -1000
        else:
            dist = self.state.getDistFromStart()
            if self.prev_dist is not None:
                reward = max(0, dist - self.prev_dist) * 10
                assert reward >= 0, "reward: %f" % reward
            else:
                reward = 0
            self.prev_dist = dist
            
            #reward -= self.state.getTrackPos()
            #print "reward:", reward
        
        return reward

    def getTerminal(self):
        return np.all(np.array(self.state.getTrack()) == -1)

    def getEpsilon(self):
        # calculate decaying exploration rate
        if self.total_train_steps < self.exploration_decay_steps:
            return self.exploration_rate_start - self.total_train_steps * (self.exploration_rate_start - self.exploration_rate_end) / self.exploration_decay_steps
        else:
            return self.exploration_rate_end
 
    def drive(self, msg):
        # parse incoming message
        self.state.setFromMsg(msg)
        
        # show sensors
        if self.show_sensors:
            self.stats.update(self.state)
        
        # fetch state, calculate reward and terminal indicator  
        state = self.getState()
        terminal = self.getTerminal()
        reward = self.getReward(terminal)
        #print "reward:", reward

        # store new experience in replay memory
        if self.enable_training and self.prev_state is not None and self.prev_steer is not None and self.prev_speed is not None:
            self.mem.add(self.prev_state, self.prev_steer, self.prev_speed, reward, state, terminal)

        # if terminal state (out of track), then restart game
        if terminal:
            print "terminal state, restarting"
            self.control.setMeta(1)
            return self.control.toMsg()
        else:
            self.control.setMeta(0)

        # choose actions for wheel and speed
        if self.enable_exploration and random.random() < self.getEpsilon():
            #print "random move"
            steer = random.randrange(self.num_steers)
            #speed = random.randrange(self.num_speeds)
            speed = random.randint(2, self.num_speeds-1)
        elif self.algorithm == 'network':
            # use broadcasting to efficiently produce minibatch of desired size
            minibatch = state + np.zeros((self.minibatch_size, 1))
            Q = self.net.predict(minibatch)
            assert Q.shape == (self.minibatch_size, self.num_actions), "Q.shape: %s" % str(Q.shape)
            #print "steer Q: ", Q[0,:21]
            #print "speed Q:", Q[0,-5:]
            steer = np.argmax(Q[0, :self.num_steers])
            speed = np.argmax(Q[0, -self.num_speeds:])
            if self.show_qvalues:
                self.plotq.update(Q[0])
        elif self.algorithm == 'hardcoded':
            steer = self.getSteerAction(self.steer())
            speed = self.getSpeedActionAccel(self.speed())
        else:
            assert False, "Unknown algorithm"
        #print "steer:", steer, "speed:", speed

        # gears are always automatic
        gear = self.gear()

        # check for manual override 
        # might be partial, so we always need to choose algorithmic actions first
        events = self.wheel.getEvents()
        if self.mode == 'override' and self.wheel.supportsDrive():
            # wheel
            for event in events:
                if self.wheel.isWheelMotion(event):
                    self.wheelsteps = self.maxwheelsteps

            if self.wheelsteps > 0:
                wheel = self.wheel.getWheel()
                steer = self.getSteerAction(wheel)
                self.wheelsteps -= 1

            # gas pedal
            accel = self.wheel.getAccel()
            if accel > 0:
                speed = self.getSpeedActionAccel(accel)
            
            # brake pedal
            brake = self.wheel.getBrake()
            if brake > 0:
                speed = self.getSpeedActionBrake(brake)

        # check for wheel buttons always, not only in override mode
        for event in events:
            if self.wheel.isButtonDown(event, 2):
                self.algorithm = 'network'
                self.mode = 'override'
                self.wheel.generateForce(0)
                print "Switched to network algorithm"
            elif self.wheel.isButtonDown(event, 3):
                self.net.load_weights(self.pretrained_network)
                self.algorithm = 'network'
                self.mode = 'ff'
                self.enable_training = False
                print "Switched to pretrained network"
            elif self.wheel.isButtonDown(event, 4):
                self.enable_training = not self.enable_training
                print "Switched training", "ON" if self.enable_training else "OFF"
            elif self.wheel.isButtonDown(event, 5):
                self.algorithm = 'hardcoded'
                self.mode = 'ff'
                print "Switched to hardcoded algorithm"
            elif self.wheel.isButtonDown(event, 6):
                self.enable_exploration = not self.enable_exploration
                self.mode = 'override'
                self.wheel.generateForce(0)
                print "Switched exploration", "ON" if self.enable_exploration else "OFF"
            elif self.wheel.isButtonDown(event, 7):
                self.mode = 'ff' if self.mode == 'override' else 'override'
                if self.mode == 'override':
                    self.wheel.generateForce(0)
                print "Switched force feedback", "ON" if self.mode == 'ff' else "OFF"
            elif self.wheel.isButtonDown(event, 0) or self.wheel.isButtonDown(event, 8):
                gear = max(-1, gear - 1)
            elif self.wheel.isButtonDown(event, 1) or self.wheel.isButtonDown(event, 9):
                gear = min(6, gear + 1)

        # set actions
        self.setSteerAction(steer)
        self.setGearAction(gear)
        self.setSpeedAction(speed)

        # turn wheel using force feedback
        if self.mode == 'ff' and self.wheel.supportsForceFeedback():
            wheel = self.wheel.getWheel()
            self.wheel.generateForce(self.control.getSteer()-wheel)

        # remember state and actions 
        self.prev_state = state
        self.prev_steer = steer
        self.prev_speed = speed

        # training
        if self.enable_training and self.mem.count >= self.minibatch_size:
            minibatch = self.mem.getMinibatch()
            self.net.train(minibatch)
            self.total_train_steps += 1
            #print "total_train_steps:", self.total_train_steps

        #print "total_train_steps:", self.total_train_steps, "mem_count:", self.mem.count

        return self.control.toMsg()

    def setSteerAction(self, steer):
        self.control.setSteer(self.steers[steer])

    def setGearAction(self, gear):
        assert -1 <= gear <= 6
        self.control.setGear(gear)

    def setSpeedAction(self, speed):
        accel = self.speeds[speed]
        if accel >= 0:
            #print "accel", accel
            self.control.setAccel(accel)
            self.control.setBrake(0)
        else:
            #print "brake", -accel
            self.control.setAccel(0)
            self.control.setBrake(-accel)

    def getSteerAction(self, wheel):
        steer = np.argmin(np.abs(np.array(self.steers) - wheel))
        return steer

    def getSpeedActionAccel(self, accel):
        speed = np.argmin(np.abs(np.array(self.speeds) - accel))
        return speed

    def getSpeedActionBrake(self, brake):
        speed = np.argmin(np.abs(np.array(self.speeds) + brake))
        return speed

    def steer(self):
        angle = self.state.angle
        dist = self.state.trackPos
        
        steer = (angle - dist*0.5)/self.steer_lock
        return steer
    
    def gear(self):
        rpm = self.state.getRpm()
        gear = self.state.getGear()
        
        if self.prev_rpm == None:
            up = True
        else:
            if (self.prev_rpm - rpm) < 0:
                up = True
            else:
                up = False
        
        if up and rpm > 7000:
            gear += 1
        
        if not up and rpm < 3000:
            gear -= 1
        
        return gear

    def speed(self):
        speed = self.state.getSpeedX()
        accel = self.prev_accel
        
        if speed < self.max_speed:
            accel += 0.1
            if accel > 1:
                accel = 1.0
        else:
            accel -= 0.1
            if accel < 0:
                accel = 0.0
        
        self.prev_accel = accel
        return accel
        
    def onShutDown(self):
        pass
    
    def onRestart(self):
        if self.mode == 'ff':
            self.wheel.generateForce(0)
    
        self.prev_rpm = None
        self.prev_accel = 0
        self.prev_dist = None
        self.prev_state = None
        self.prev_steer = None
        self.prev_speed = None
        self.wheelsteps = 0

        if self.save_weights_prefix and self.episode > 0:
            self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode) + ".pkl")

        self.episode += 1
        print "Episode", self.episode