def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0] self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.num_inputs = 19 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.num_actions = self.num_steers + self.num_speeds self.net = DeepQNetwork(self.num_inputs, self.num_steers, self.num_speeds, args) self.mem = ReplayMemory(args.replay_size, self.num_inputs, args) self.minibatch_size = args.batch_size if args.load_replay: self.mem.load(args.load_replay) if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.save_interval = args.save_interval self.save_replay = args.save_replay self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.save_csv = args.save_csv if self.save_csv: self.csv_file = open(args.save_csv, "wb") self.csv_writer = csv.writer(self.csv_file) self.csv_writer.writerow(['episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'epsilon', 'replay_memory', 'train_steps']) self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.skip = args.skip self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.episode = 0 self.distances = [] self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds)
class Driver(object): ''' A driver object for the SCRC ''' def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0] #self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.speeds = [0.0, 0.5, 1.0] self.num_inputs = 20 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.learn = args.learn if self.learn == 'both': self.net = DeepQNetwork(self.num_inputs, (self.num_steers, self.num_speeds), args) self.num_actions = 2 elif self.learn == 'steer': self.net = DeepQNetwork(self.num_inputs, (self.num_steers,), args) self.num_actions = 1 elif self.learn == 'speed': self.net = DeepQNetwork(self.num_inputs, (self.num_speeds,), args) self.num_actions = 1 else: assert False self.mem = ReplayMemory(args.replay_size, self.num_inputs, self.num_actions) self.minibatch_size = args.batch_size if args.load_replay: self.mem.load(args.load_replay) if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.save_interval = args.save_interval self.save_replay = args.save_replay self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.save_csv = args.save_csv if self.save_csv: self.csv_file = open(args.save_csv + '.csv', "wb") self.csv_writer = csv.writer(self.csv_file) self.csv_writer.writerow(['episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'epsilon', 'replay_memory', 'train_steps', 'avgmaxQ', 'avgloss']) self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.skip = args.skip self.repeat_train = args.repeat_train self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.steer_lock = 0.785398 self.max_speed = 100 self.loss_sum = self.loss_steps = 0 self.maxQ_sum = self.maxQ_steps = 0 self.episode = 0 self.distances = [] self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds, args.update_qvalues_interval) def init(self): '''Return init string with rangefinder angles''' self.angles = [0 for x in range(19)] for i in range(5): self.angles[i] = -90 + i * 15 self.angles[18 - i] = 90 - i * 15 for i in range(5, 9): self.angles[i] = -20 + (i-5) * 5 self.angles[18 - i] = 20 - (i-5) * 5 return self.parser.stringify({'init': self.angles}) def getState(self): #state = np.array([self.state.getSpeedX() / 200.0, self.state.getAngle(), self.state.getTrackPos()]) #state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) / 200.0 #state = np.array(self.state.getTrack()) / 200.0 state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) assert state.shape == (self.num_inputs,) return state def getReward(self, terminal): if terminal: reward = -1000 else: dist = self.state.getDistFromStart() if self.prev_dist is not None: reward = max(0, dist - self.prev_dist) * 10 assert reward >= 0, "reward: %f" % reward else: reward = 0 self.prev_dist = dist reward -= abs(10 * self.state.getTrackPos()) #print "reward:", reward return reward def getTerminal(self): return np.all(np.array(self.state.getTrack()) == -1) def getEpsilon(self): # calculate decaying exploration rate if self.total_train_steps < self.exploration_decay_steps: return self.exploration_rate_start - self.total_train_steps * (self.exploration_rate_start - self.exploration_rate_end) / self.exploration_decay_steps else: return self.exploration_rate_end def drive(self, msg): # parse incoming message self.state.setFromMsg(msg) # show sensors if self.show_sensors: self.stats.update(self.state) # training if self.enable_training and self.mem.count > 0: for i in xrange(self.repeat_train): minibatch = self.mem.getMinibatch(self.minibatch_size) self.loss_sum += self.net.train(minibatch) self.loss_steps += 1 # skip frame and use the same action as previously if self.skip > 0: self.frame = (self.frame + 1) % self.skip if self.frame != 0: return self.control.toMsg() # fetch state, calculate reward and terminal indicator state = self.getState() terminal = self.getTerminal() reward = self.getReward(terminal) #print "reward:", reward # store new experience in replay memory if self.enable_training and self.prev_state is not None and self.prev_action is not None: self.mem.add(self.prev_state, self.prev_action, reward, state, terminal) # if terminal state (out of track), then restart game if self.enable_training and terminal: #print "terminal state, restarting" self.control.setMeta(1) return self.control.toMsg() else: self.control.setMeta(0) # use broadcasting to efficiently produce minibatch of desired size minibatch = state + np.zeros((self.minibatch_size, 1)) Q = self.net.predict(minibatch) #print "Q:", Q[0] if self.show_qvalues: self.plotq.update(Q[0]) self.maxQ_sum += np.max(Q[0]) self.maxQ_steps += 1 # choose actions for wheel and speed epsilon = self.getEpsilon() #print "epsilon:", epsilon if self.learn == 'steer' or self.learn == 'both': if self.enable_exploration and random.random() < epsilon: #print "random steer" steer = random.randrange(self.num_steers) else: #print "steer Q: ", Q[0,:self.num_steers] steer = np.argmax(Q[0, :self.num_steers]) #steer = np.argmax(Q[0]) self.setSteerAction(steer) else: self.steer() if self.learn == 'speed' or self.learn == 'both': if self.enable_exploration and random.random() < epsilon: #speed = random.randrange(self.num_speeds) # don't do braking speed = random.randint(2, self.num_speeds-1) else: #print "speed Q:", Q[0,-self.num_speeds:] speed = np.argmax(Q[0, -self.num_speeds:]) #speed = np.argmax(Q[0]) self.setSpeedAction(speed) else: self.speed() #print "steer:", steer, "speed:", speed #print "speed:", speed #print "steer:", steer # gears are always automatic gear = self.gear() self.setGearAction(gear) # remember state and actions self.prev_state = state if self.learn == 'both': self.prev_action = np.array([steer, speed]) elif self.learn == 'steer': self.prev_action = np.array([steer]) elif self.learn == 'speed': self.prev_action = np.array([speed]) else: assert False self.total_train_steps += 1 #print "total_train_steps:", self.total_train_steps #print "total_train_steps:", self.total_train_steps, "mem_count:", self.mem.count #print "reward:", reward, "epsilon:", epsilon return self.control.toMsg() def steer(self): angle = self.state.angle dist = self.state.trackPos self.control.setSteer((angle - dist*0.5)/self.steer_lock) def gear(self): speed = self.state.getSpeedX() gear = self.state.getGear() if speed < 25: gear = 1 elif 30 < speed < 55: gear = 2 elif 60 < speed < 85: gear = 3 elif 90 < speed < 115: gear = 4 elif 120 < speed < 145: gear = 5 elif speed > 150: gear = 6 return gear def speed(self): speed = self.state.getSpeedX() accel = self.control.getAccel() if speed < self.max_speed: accel += 0.1 if accel > 1: accel = 1.0 else: accel -= 0.1 if accel < 0: accel = 0.0 self.control.setAccel(accel) def setSteerAction(self, steer): assert 0 <= steer <= self.num_steers self.control.setSteer(self.steers[steer]) def setGearAction(self, gear): assert -1 <= gear <= 6 self.control.setGear(gear) def setSpeedAction(self, speed): assert 0 <= speed <= self.num_speeds accel = self.speeds[speed] if accel >= 0: #print "accel", accel self.control.setAccel(accel) self.control.setBrake(0) else: #print "brake", -accel self.control.setAccel(0) self.control.setBrake(-accel) def onShutDown(self): #if self.save_weights_prefix: # self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode - 1) + ".pkl") if self.save_replay: self.mem.save(self.save_replay) if self.save_csv: self.csv_file.close() def onRestart(self): self.prev_rpm = None self.prev_dist = None self.prev_state = None self.prev_action = None self.frame = -1 if self.episode > 0: dist = self.state.getDistRaced() self.distances.append(dist) epsilon = self.getEpsilon() avgloss = self.loss_sum / max(self.loss_steps, 1) self.loss_sum = self.loss_steps = 0 avgmaxQ = self.maxQ_sum / max(self.maxQ_steps, 1) self.maxQ_sum = self.maxQ_steps = 0 print "Episode:", self.episode, "\tDistance:", dist, "\tMax:", max(self.distances), "\tMedian10:", np.median(self.distances[-10:]), \ "\tEpsilon:", epsilon, "\tReplay memory:", self.mem.count, "\tAverage loss:", avgloss, "\tAverage maxQ", avgmaxQ if self.save_weights_prefix and self.save_interval > 0 and self.episode % self.save_interval == 0: self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode) + ".pkl") #self.mem.save(self.save_weights_prefix + "_" + str(self.episode) + "_replay.pkl") if self.save_csv: self.csv_writer.writerow([ self.episode, self.state.getDistFromStart(), self.state.getDistRaced(), self.state.getCurLapTime(), self.state.getLastLapTime(), self.state.getRacePos(), epsilon, self.mem.count, self.total_train_steps, avgmaxQ, avgloss ]) self.csv_file.flush() self.episode += 1
class Driver(object): ''' A driver object for the SCRC ''' def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [ -1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0 ] #self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.speeds = [0.0, 0.5, 1.0] self.num_inputs = 20 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.learn = args.learn if self.learn == 'both': self.net = DeepQNetwork(self.num_inputs, (self.num_steers, self.num_speeds), args) self.num_actions = 2 elif self.learn == 'steer': self.net = DeepQNetwork(self.num_inputs, (self.num_steers, ), args) self.num_actions = 1 elif self.learn == 'speed': self.net = DeepQNetwork(self.num_inputs, (self.num_speeds, ), args) self.num_actions = 1 else: assert False self.mem = ReplayMemory(args.replay_size, self.num_inputs, self.num_actions) self.minibatch_size = args.batch_size if args.load_replay: self.mem.load(args.load_replay) if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.save_interval = args.save_interval self.save_replay = args.save_replay self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.save_csv = args.save_csv if self.save_csv: self.csv_file = open(args.save_csv + '.csv', "wb") self.csv_writer = csv.writer(self.csv_file) self.csv_writer.writerow([ 'episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'epsilon', 'replay_memory', 'train_steps', 'avgmaxQ', 'avgloss' ]) self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.skip = args.skip self.repeat_train = args.repeat_train self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.steer_lock = 0.785398 self.max_speed = 100 self.loss_sum = self.loss_steps = 0 self.maxQ_sum = self.maxQ_steps = 0 self.episode = 0 self.distances = [] self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds, args.update_qvalues_interval) def init(self): '''Return init string with rangefinder angles''' self.angles = [0 for x in range(19)] for i in range(5): self.angles[i] = -90 + i * 15 self.angles[18 - i] = 90 - i * 15 for i in range(5, 9): self.angles[i] = -20 + (i - 5) * 5 self.angles[18 - i] = 20 - (i - 5) * 5 return self.parser.stringify({'init': self.angles}) def getState(self): #state = np.array([self.state.getSpeedX() / 200.0, self.state.getAngle(), self.state.getTrackPos()]) #state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) / 200.0 #state = np.array(self.state.getTrack()) / 200.0 state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) assert state.shape == (self.num_inputs, ) return state def getReward(self, terminal): if terminal: reward = -1000 else: dist = self.state.getDistFromStart() if self.prev_dist is not None: reward = max(0, dist - self.prev_dist) * 10 assert reward >= 0, "reward: %f" % reward else: reward = 0 self.prev_dist = dist reward -= abs(10 * self.state.getTrackPos()) #print "reward:", reward return reward def getTerminal(self): return np.all(np.array(self.state.getTrack()) == -1) def getEpsilon(self): # calculate decaying exploration rate if self.total_train_steps < self.exploration_decay_steps: return self.exploration_rate_start - self.total_train_steps * ( self.exploration_rate_start - self.exploration_rate_end) / self.exploration_decay_steps else: return self.exploration_rate_end def drive(self, msg): # parse incoming message self.state.setFromMsg(msg) # show sensors if self.show_sensors: self.stats.update(self.state) # training if self.enable_training and self.mem.count > 0: for i in xrange(self.repeat_train): minibatch = self.mem.getMinibatch(self.minibatch_size) self.loss_sum += self.net.train(minibatch) self.loss_steps += 1 # skip frame and use the same action as previously if self.skip > 0: self.frame = (self.frame + 1) % self.skip if self.frame != 0: return self.control.toMsg() # fetch state, calculate reward and terminal indicator state = self.getState() terminal = self.getTerminal() reward = self.getReward(terminal) #print "reward:", reward # store new experience in replay memory if self.enable_training and self.prev_state is not None and self.prev_action is not None: self.mem.add(self.prev_state, self.prev_action, reward, state, terminal) # if terminal state (out of track), then restart game if self.enable_training and terminal: #print "terminal state, restarting" self.control.setMeta(1) return self.control.toMsg() else: self.control.setMeta(0) # use broadcasting to efficiently produce minibatch of desired size minibatch = state + np.zeros((self.minibatch_size, 1)) Q = self.net.predict(minibatch) #print "Q:", Q[0] if self.show_qvalues: self.plotq.update(Q[0]) self.maxQ_sum += np.max(Q[0]) self.maxQ_steps += 1 # choose actions for wheel and speed epsilon = self.getEpsilon() #print "epsilon:", epsilon if self.learn == 'steer' or self.learn == 'both': if self.enable_exploration and random.random() < epsilon: #print "random steer" steer = random.randrange(self.num_steers) else: #print "steer Q: ", Q[0,:self.num_steers] steer = np.argmax(Q[0, :self.num_steers]) #steer = np.argmax(Q[0]) self.setSteerAction(steer) else: self.steer() if self.learn == 'speed' or self.learn == 'both': if self.enable_exploration and random.random() < epsilon: #speed = random.randrange(self.num_speeds) # don't do braking speed = random.randint(2, self.num_speeds - 1) else: #print "speed Q:", Q[0,-self.num_speeds:] speed = np.argmax(Q[0, -self.num_speeds:]) #speed = np.argmax(Q[0]) self.setSpeedAction(speed) else: self.speed() #print "steer:", steer, "speed:", speed #print "speed:", speed #print "steer:", steer # gears are always automatic gear = self.gear() self.setGearAction(gear) # remember state and actions self.prev_state = state if self.learn == 'both': self.prev_action = np.array([steer, speed]) elif self.learn == 'steer': self.prev_action = np.array([steer]) elif self.learn == 'speed': self.prev_action = np.array([speed]) else: assert False self.total_train_steps += 1 #print "total_train_steps:", self.total_train_steps #print "total_train_steps:", self.total_train_steps, "mem_count:", self.mem.count #print "reward:", reward, "epsilon:", epsilon return self.control.toMsg() def steer(self): angle = self.state.angle dist = self.state.trackPos self.control.setSteer((angle - dist * 0.5) / self.steer_lock) def gear(self): speed = self.state.getSpeedX() gear = self.state.getGear() if speed < 25: gear = 1 elif 30 < speed < 55: gear = 2 elif 60 < speed < 85: gear = 3 elif 90 < speed < 115: gear = 4 elif 120 < speed < 145: gear = 5 elif speed > 150: gear = 6 return gear def speed(self): speed = self.state.getSpeedX() accel = self.control.getAccel() if speed < self.max_speed: accel += 0.1 if accel > 1: accel = 1.0 else: accel -= 0.1 if accel < 0: accel = 0.0 self.control.setAccel(accel) def setSteerAction(self, steer): assert 0 <= steer <= self.num_steers self.control.setSteer(self.steers[steer]) def setGearAction(self, gear): assert -1 <= gear <= 6 self.control.setGear(gear) def setSpeedAction(self, speed): assert 0 <= speed <= self.num_speeds accel = self.speeds[speed] if accel >= 0: #print "accel", accel self.control.setAccel(accel) self.control.setBrake(0) else: #print "brake", -accel self.control.setAccel(0) self.control.setBrake(-accel) def onShutDown(self): #if self.save_weights_prefix: # self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode - 1) + ".pkl") if self.save_replay: self.mem.save(self.save_replay) if self.save_csv: self.csv_file.close() def onRestart(self): self.prev_rpm = None self.prev_dist = None self.prev_state = None self.prev_action = None self.frame = -1 if self.episode > 0: dist = self.state.getDistRaced() self.distances.append(dist) epsilon = self.getEpsilon() avgloss = self.loss_sum / max(self.loss_steps, 1) self.loss_sum = self.loss_steps = 0 avgmaxQ = self.maxQ_sum / max(self.maxQ_steps, 1) self.maxQ_sum = self.maxQ_steps = 0 print "Episode:", self.episode, "\tDistance:", dist, "\tMax:", max(self.distances), "\tMedian10:", np.median(self.distances[-10:]), \ "\tEpsilon:", epsilon, "\tReplay memory:", self.mem.count, "\tAverage loss:", avgloss, "\tAverage maxQ", avgmaxQ if self.save_weights_prefix and self.save_interval > 0 and self.episode % self.save_interval == 0: self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode) + ".pkl") #self.mem.save(self.save_weights_prefix + "_" + str(self.episode) + "_replay.pkl") if self.save_csv: self.csv_writer.writerow([ self.episode, self.state.getDistFromStart(), self.state.getDistRaced(), self.state.getCurLapTime(), self.state.getLastLapTime(), self.state.getRacePos(), epsilon, self.mem.count, self.total_train_steps, avgmaxQ, avgloss ]) self.csv_file.flush() self.episode += 1
def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0] #self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.speeds = [0.0, 0.5, 1.0] self.num_inputs = 3 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.learn = args.learn if self.learn == 'both': self.net = LinearModel(self.num_inputs, (self.num_steers, self.num_speeds)) self.num_actions = 2 elif self.learn == 'steer': self.net = LinearModel(self.num_inputs, (self.num_steers,)) self.num_actions = 1 elif self.learn == 'speed': self.net = LinearModel(self.num_inputs, (self.num_speeds,)) self.num_actions = 1 else: assert False self.mem = ReplayMemory(args.replay_size, self.num_inputs, self.num_actions) if args.load_replay: self.mem.load(args.load_replay) if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.save_interval = args.save_interval self.save_replay = args.save_replay self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.save_csv = args.save_csv if self.save_csv: self.csv_file = open(args.save_csv + '.csv', "wb") self.csv_writer = csv.writer(self.csv_file) self.csv_writer.writerow(['episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'epsilon', 'replay_memory', 'train_steps', 'avgmaxQ', 'avgloss']) self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.skip = args.skip self.repeat_train = args.repeat_train self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.steer_lock = 0.785398 self.max_speed = 100 self.loss_sum = self.loss_steps = 0 self.maxQ_sum = self.maxQ_steps = 0 self.episode = 0 self.distances = [] self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds, args.update_qvalues_interval)
def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0] self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.num_inputs = 19 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.num_actions = self.num_steers + self.num_speeds self.net = DeepQNetwork(self.num_inputs, self.num_steers, self.num_speeds, args) self.mem = ReplayMemory(args.replay_size, self.num_inputs, args) self.minibatch_size = args.batch_size if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.pretrained_network = args.pretrained_network self.steer_lock = 0.785398 self.max_speed = 100 self.algorithm = args.algorithm self.device = args.device self.mode = args.mode self.maxwheelsteps = args.maxwheelsteps self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.episode = 0 self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds) if self.device == 'wheel': from wheel import Wheel self.wheel = Wheel(args.joystick_nr, args.autocenter, args.gain, args.min_force, args.max_force)
class Driver(object): ''' A driver object for the SCRC ''' def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [-1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0] self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.num_inputs = 19 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.num_actions = self.num_steers + self.num_speeds self.net = DeepQNetwork(self.num_inputs, self.num_steers, self.num_speeds, args) self.mem = ReplayMemory(args.replay_size, self.num_inputs, args) self.minibatch_size = args.batch_size if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.pretrained_network = args.pretrained_network self.steer_lock = 0.785398 self.max_speed = 100 self.algorithm = args.algorithm self.device = args.device self.mode = args.mode self.maxwheelsteps = args.maxwheelsteps self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.episode = 0 self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds) if self.device == 'wheel': from wheel import Wheel self.wheel = Wheel(args.joystick_nr, args.autocenter, args.gain, args.min_force, args.max_force) def init(self): '''Return init string with rangefinder angles''' self.angles = [0 for x in range(19)] for i in range(5): self.angles[i] = -90 + i * 15 self.angles[18 - i] = 90 - i * 15 for i in range(5, 9): self.angles[i] = -20 + (i-5) * 5 self.angles[18 - i] = 20 - (i-5) * 5 return self.parser.stringify({'init': self.angles}) def getState(self): #state = np.array([self.state.getSpeedX() / 200.0, self.state.getAngle(), self.state.getTrackPos()]) #state = np.array(self.state.getTrack() + [self.state.getSpeedX()]) / 200.0 state = np.array(self.state.getTrack()) / 200.0 assert state.shape == (self.num_inputs,) return state def getReward(self, terminal): if terminal: reward = -1000 else: dist = self.state.getDistFromStart() if self.prev_dist is not None: reward = max(0, dist - self.prev_dist) * 10 assert reward >= 0, "reward: %f" % reward else: reward = 0 self.prev_dist = dist #reward -= self.state.getTrackPos() #print "reward:", reward return reward def getTerminal(self): return np.all(np.array(self.state.getTrack()) == -1) def getEpsilon(self): # calculate decaying exploration rate if self.total_train_steps < self.exploration_decay_steps: return self.exploration_rate_start - self.total_train_steps * (self.exploration_rate_start - self.exploration_rate_end) / self.exploration_decay_steps else: return self.exploration_rate_end def drive(self, msg): # parse incoming message self.state.setFromMsg(msg) # show sensors if self.show_sensors: self.stats.update(self.state) # fetch state, calculate reward and terminal indicator state = self.getState() terminal = self.getTerminal() reward = self.getReward(terminal) #print "reward:", reward # store new experience in replay memory if self.enable_training and self.prev_state is not None and self.prev_steer is not None and self.prev_speed is not None: self.mem.add(self.prev_state, self.prev_steer, self.prev_speed, reward, state, terminal) # if terminal state (out of track), then restart game if terminal: print "terminal state, restarting" self.control.setMeta(1) return self.control.toMsg() else: self.control.setMeta(0) # choose actions for wheel and speed if self.enable_exploration and random.random() < self.getEpsilon(): #print "random move" steer = random.randrange(self.num_steers) #speed = random.randrange(self.num_speeds) speed = random.randint(2, self.num_speeds-1) elif self.algorithm == 'network': # use broadcasting to efficiently produce minibatch of desired size minibatch = state + np.zeros((self.minibatch_size, 1)) Q = self.net.predict(minibatch) assert Q.shape == (self.minibatch_size, self.num_actions), "Q.shape: %s" % str(Q.shape) #print "steer Q: ", Q[0,:21] #print "speed Q:", Q[0,-5:] steer = np.argmax(Q[0, :self.num_steers]) speed = np.argmax(Q[0, -self.num_speeds:]) if self.show_qvalues: self.plotq.update(Q[0]) elif self.algorithm == 'hardcoded': steer = self.getSteerAction(self.steer()) speed = self.getSpeedActionAccel(self.speed()) else: assert False, "Unknown algorithm" #print "steer:", steer, "speed:", speed # gears are always automatic gear = self.gear() # check for manual override # might be partial, so we always need to choose algorithmic actions first events = self.wheel.getEvents() if self.mode == 'override' and self.wheel.supportsDrive(): # wheel for event in events: if self.wheel.isWheelMotion(event): self.wheelsteps = self.maxwheelsteps if self.wheelsteps > 0: wheel = self.wheel.getWheel() steer = self.getSteerAction(wheel) self.wheelsteps -= 1 # gas pedal accel = self.wheel.getAccel() if accel > 0: speed = self.getSpeedActionAccel(accel) # brake pedal brake = self.wheel.getBrake() if brake > 0: speed = self.getSpeedActionBrake(brake) # check for wheel buttons always, not only in override mode for event in events: if self.wheel.isButtonDown(event, 2): self.algorithm = 'network' self.mode = 'override' self.wheel.generateForce(0) print "Switched to network algorithm" elif self.wheel.isButtonDown(event, 3): self.net.load_weights(self.pretrained_network) self.algorithm = 'network' self.mode = 'ff' self.enable_training = False print "Switched to pretrained network" elif self.wheel.isButtonDown(event, 4): self.enable_training = not self.enable_training print "Switched training", "ON" if self.enable_training else "OFF" elif self.wheel.isButtonDown(event, 5): self.algorithm = 'hardcoded' self.mode = 'ff' print "Switched to hardcoded algorithm" elif self.wheel.isButtonDown(event, 6): self.enable_exploration = not self.enable_exploration self.mode = 'override' self.wheel.generateForce(0) print "Switched exploration", "ON" if self.enable_exploration else "OFF" elif self.wheel.isButtonDown(event, 7): self.mode = 'ff' if self.mode == 'override' else 'override' if self.mode == 'override': self.wheel.generateForce(0) print "Switched force feedback", "ON" if self.mode == 'ff' else "OFF" elif self.wheel.isButtonDown(event, 0) or self.wheel.isButtonDown(event, 8): gear = max(-1, gear - 1) elif self.wheel.isButtonDown(event, 1) or self.wheel.isButtonDown(event, 9): gear = min(6, gear + 1) # set actions self.setSteerAction(steer) self.setGearAction(gear) self.setSpeedAction(speed) # turn wheel using force feedback if self.mode == 'ff' and self.wheel.supportsForceFeedback(): wheel = self.wheel.getWheel() self.wheel.generateForce(self.control.getSteer()-wheel) # remember state and actions self.prev_state = state self.prev_steer = steer self.prev_speed = speed # training if self.enable_training and self.mem.count >= self.minibatch_size: minibatch = self.mem.getMinibatch() self.net.train(minibatch) self.total_train_steps += 1 #print "total_train_steps:", self.total_train_steps #print "total_train_steps:", self.total_train_steps, "mem_count:", self.mem.count return self.control.toMsg() def setSteerAction(self, steer): self.control.setSteer(self.steers[steer]) def setGearAction(self, gear): assert -1 <= gear <= 6 self.control.setGear(gear) def setSpeedAction(self, speed): accel = self.speeds[speed] if accel >= 0: #print "accel", accel self.control.setAccel(accel) self.control.setBrake(0) else: #print "brake", -accel self.control.setAccel(0) self.control.setBrake(-accel) def getSteerAction(self, wheel): steer = np.argmin(np.abs(np.array(self.steers) - wheel)) return steer def getSpeedActionAccel(self, accel): speed = np.argmin(np.abs(np.array(self.speeds) - accel)) return speed def getSpeedActionBrake(self, brake): speed = np.argmin(np.abs(np.array(self.speeds) + brake)) return speed def steer(self): angle = self.state.angle dist = self.state.trackPos steer = (angle - dist*0.5)/self.steer_lock return steer def gear(self): rpm = self.state.getRpm() gear = self.state.getGear() if self.prev_rpm == None: up = True else: if (self.prev_rpm - rpm) < 0: up = True else: up = False if up and rpm > 7000: gear += 1 if not up and rpm < 3000: gear -= 1 return gear def speed(self): speed = self.state.getSpeedX() accel = self.prev_accel if speed < self.max_speed: accel += 0.1 if accel > 1: accel = 1.0 else: accel -= 0.1 if accel < 0: accel = 0.0 self.prev_accel = accel return accel def onShutDown(self): pass def onRestart(self): if self.mode == 'ff': self.wheel.generateForce(0) self.prev_rpm = None self.prev_accel = 0 self.prev_dist = None self.prev_state = None self.prev_steer = None self.prev_speed = None self.wheelsteps = 0 if self.save_weights_prefix and self.episode > 0: self.net.save_weights(self.save_weights_prefix + "_" + str(self.episode) + ".pkl") self.episode += 1 print "Episode", self.episode
def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.steers = [ -1.0, -0.8, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1.0 ] #self.speeds = [-1.0, -0.5, 0.0, 0.5, 1.0] self.speeds = [0.0, 0.5, 1.0] self.num_inputs = 3 self.num_steers = len(self.steers) self.num_speeds = len(self.speeds) self.learn = args.learn if self.learn == 'both': self.net = LinearModel(self.num_inputs, (self.num_steers, self.num_speeds)) self.num_actions = 2 elif self.learn == 'steer': self.net = LinearModel(self.num_inputs, (self.num_steers, )) self.num_actions = 1 elif self.learn == 'speed': self.net = LinearModel(self.num_inputs, (self.num_speeds, )) self.num_actions = 1 else: assert False self.mem = ReplayMemory(args.replay_size, self.num_inputs, self.num_actions) if args.load_replay: self.mem.load(args.load_replay) if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.save_interval = args.save_interval self.save_replay = args.save_replay self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.save_csv = args.save_csv if self.save_csv: self.csv_file = open(args.save_csv + '.csv', "wb") self.csv_writer = csv.writer(self.csv_file) self.csv_writer.writerow([ 'episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'epsilon', 'replay_memory', 'train_steps', 'avgmaxQ', 'avgloss' ]) self.total_train_steps = 0 self.exploration_decay_steps = args.exploration_decay_steps self.exploration_rate_start = args.exploration_rate_start self.exploration_rate_end = args.exploration_rate_end self.skip = args.skip self.repeat_train = args.repeat_train self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.steer_lock = 0.785398 self.max_speed = 100 self.loss_sum = self.loss_steps = 0 self.maxQ_sum = self.maxQ_steps = 0 self.episode = 0 self.distances = [] self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds, args.update_qvalues_interval)
def __init__(self, args): '''Constructor''' self.WARM_UP = 0 self.QUALIFYING = 1 self.RACE = 2 self.UNKNOWN = 3 self.stage = args.stage self.parser = msgParser.MsgParser() self.state = carState.CarState() self.control = carControl.CarControl() self.learn = args.learn self.num_actions = 2 if self.learn == 'both' else 1 self.num_inputs = 20 self.net = DeepQNetwork(self.num_inputs, self.num_actions, args) self.mem = ReplayMemory(args.replay_size, self.num_inputs, self.num_actions, action_dtype=np.float) self.minibatch_size = args.batch_size if args.load_replay: self.mem.load(args.load_replay) if args.load_weights: self.net.load_weights(args.load_weights) self.save_weights_prefix = args.save_weights_prefix self.save_interval = args.save_interval self.save_replay = args.save_replay self.enable_training = args.enable_training self.enable_exploration = args.enable_exploration self.save_csv = args.save_csv if self.save_csv: self.csv_file = open(args.save_csv + '.csv', "wb") self.csv_writer = csv.writer(self.csv_file) self.csv_writer.writerow([ 'episode', 'distFormStart', 'distRaced', 'curLapTime', 'lastLapTime', 'racePos', 'replay_memory', 'train_steps', 'avgmaxQ', 'avgloss' ]) self.skip = args.skip self.repeat_train = args.repeat_train self.show_sensors = args.show_sensors self.show_qvalues = args.show_qvalues self.steer_lock = 0.785398 self.max_speed = 100 self.loss_sum = self.loss_steps = 0 self.maxQ_sum = self.maxQ_steps = 0 self.episode = 0 self.distances = [] self.onRestart() if self.show_sensors: from sensorstats import Stats self.stats = Stats(inevery=8) if self.show_qvalues: from plotq import PlotQ self.plotq = PlotQ(self.num_steers, self.num_speeds, args.update_qvalues_interval)