def __init__(self): # action space self.actions = np.arange(7) ''' 0 = sharp left, 1 = left, 2 = slightly left, 3 = forward, 4 = slightly right, 5 = right, 6 = sharp right, (not in array: 7 = stop) ''' self.stop_action = 7 # state space self.states = np.arange(8) ''' 0 = line is far left, 1 = line is left, 2 = line is slightly left, 3 = line is in the middle, 4 = line is slightly right, 5 = line is right, 6 = line is far right, 7 = line is lost ''' self.lost_line = 7 # q-matrix (empty in the beginning) self.Q = np.zeros(shape=[len(self.states), len(self.actions)]) # image helper self.img_helper = mi.MyImage()
def __init__(self): '''--------------Adjust before running program-----------------''' # increment if you wish to save a new version of the network model # or set to specific model version if you wish to use an existing # model self.path_nr = 0 # set to False if you wish to run program with existing model self.learn = True '''---------------------Hyperparameters------------------------''' # hyperparameters to experiment with # number of learning episodes # self.max_episodes = 475 self.max_episodes = 1000 self.max_steps_per_episode = 400 # speed of the robot's wheels self.speed = 12.0 # replay buffer capacity self.rb_capacity = 10000 # number of examples that will be extracted at once from # the memory self.batch_size = 100 # number of memory samples that will be processed together in # one execution of the neural network self.mini_batch_size = 1 # variables for Bellman equation self.gamma = 0.95 self.alpha = 0.95 # update rate for target network self.update_r_targets = 5 # integer variable after how many episodes exploiting is possible self.start_decaying = (self.max_episodes / 5) # self.start_decaying = 0 '''------------------------Class objects-----------------------''' # helper classes self.bot = bt.Bot() # reward + q-matrix self.imgHelper = mi.MyImage() # image processing self.memory = Memory.Memory(self.rb_capacity) # replay buffer '''--------------------------Images----------------------------''' # current image self.my_img = np.zeros(50) # image stack self.image_stack = np.zeros(shape=[self.mini_batch_size, 50]) self.image_stack_help = RingBuffer(self.mini_batch_size, dtype=(np.uint8, (1, 50))) # flattened current multiple images self.my_mult_img = np.zeros(shape=[1, self.mini_batch_size * 50]) # last images self.last_imgs = np.copy(self.my_mult_img) # number of images received in total self.img_cnt = 0 '''-------------------------Episodes---------------------------''' # episodes self.explorationMode = False # exploring or exploiting? self.episode_counter = 0 # number of done episodes self.step_counter = 0 # counts every while iteration self.steps_in_episode = 0 # counts steps inside current episode # variables deciding whether to explore or to exploit # old --> these are currently used self.exploration_prob = 0.99 # self.exploration_prob = 0.8 self.decay_rate = 0.01 self.min_exploration_rate = 0.01 self.max_exploration_rate = 1 self.decay_per_episode = self.calc_decay_per_episode() # new self.epsilon = 1 self.epsilon_min = 0.001 self.epsilon_decay = 0.999 '''-------------------------Strings----------------------------''' # strings to display actions self.action_strings = { 0: "sharp left", 1: "left", 2: "slightly left", 3: "forward", 4: "slightly right", 5: "right", 6: "sharp right", 7: "stop" } # strings to display states self.state_strings = { 0: "far left", 1: "left", 2: "slightly left", 3: "middle", 4: "slightly right", 5: "right", 6: "far right", 7: "lost" } '''----------------------Neural network------------------------''' # neural network # tensorflow session object self.sess = tf.compat.v1.Session() # policy network input_shape = np.shape(self.my_mult_img) self.policy_net = Network.Network(mini_batch_size=self.mini_batch_size) # target array (for simple way) self.targets = np.zeros(shape=[1, (len(self.action_strings) - 1)]) # target network to calculate 'optimal' q-values self.target_net = Network.Network(mini_batch_size=self.mini_batch_size) # copy weights and layers from the policy net into the target net self.target_net = self.policy_net.copy(self.target_net) '''--------------------------Driving---------------------------''' # terminal states self.lost_line = 7 self.stop_action = 7 # current state and action (initially negative) self.curr_state = -1 self.curr_action = -1 self.last_state = self.curr_state # starting coordinates of the robot self.x_position = -0.9032014349 self.y_position = -6.22487658223 self.z_position = -0.0298790967155 # deviation from speed to turn the robot to the left or right # sharp curve => big difference self.sharp = self.speed * (1.0 / 7.0) # middle curve => middle difference self.middle = self.speed * (1.0 / 8.5) # slight curve => slight difference self.slightly = self.speed * (1.0 / 10.0) # flag to indicate end of learning self.first_time = True '''----------------------------ROS-----------------------------''' # message to post on topic /cmd_vel self.vel_msg = Twist() # ROS variables # publisher to publish on topic /cmd_vel self.velocity_publisher = rospy.Publisher('/cmd_vel', Twist, queue_size=100) # initializing ROS-node rospy.init_node('reinf_matrix_driving', anonymous=True) # subscriber for topic '/camera/image_raw' self.sub = rospy.Subscriber('/camera/image_raw', Image, self.cam_im_raw_callback) '''------------------------statistics--------------------------''' self.step_array = np.zeros(shape=[self.max_episodes + 1]) self.actions = 0 self.pics_since_action = 0