예제 #1
0
  def __init__(self):
    # action space
    self.actions = np.arange(7)
    '''
    0 = sharp left, 1 = left, 2 = slightly left, 3 = forward, 
    4 = slightly right, 5 = right, 6 = sharp right, (not in array: 7 = stop)
    '''
    self.stop_action = 7

    # state space
    self.states = np.arange(8)
    '''
    0 = line is far left, 1 = line is left, 2 = line is slightly left, 3 = line is in the middle, 
    4 = line is slightly right, 5 = line is right, 6 = line is far right, 7 = line is lost 
    '''
    self.lost_line = 7

    # q-matrix (empty in the beginning)
    self.Q = np.zeros(shape=[len(self.states), len(self.actions)])

    # image helper
    self.img_helper = mi.MyImage()
예제 #2
0
파일: main.py 프로젝트: Lizzylizard/RL_Bot
    def __init__(self):
        '''--------------Adjust before running program-----------------'''
        # increment if you wish to save a new version of the network model
        # or set to specific model version if you wish to use an existing
        # model
        self.path_nr = 0
        # set to False if you wish to run program with existing model
        self.learn = True
        '''---------------------Hyperparameters------------------------'''
        # hyperparameters to experiment with
        # number of learning episodes
        # self.max_episodes = 475
        self.max_episodes = 1000
        self.max_steps_per_episode = 400
        # speed of the robot's wheels
        self.speed = 12.0
        # replay buffer capacity
        self.rb_capacity = 10000
        # number of examples that will be extracted at once from
        # the memory
        self.batch_size = 100
        # number of memory samples that will be processed together in
        # one execution of the neural network
        self.mini_batch_size = 1
        # variables for Bellman equation
        self.gamma = 0.95
        self.alpha = 0.95
        # update rate for target network
        self.update_r_targets = 5
        # integer variable after how many episodes exploiting is possible
        self.start_decaying = (self.max_episodes / 5)
        # self.start_decaying = 0
        '''------------------------Class objects-----------------------'''
        # helper classes
        self.bot = bt.Bot()  # reward + q-matrix
        self.imgHelper = mi.MyImage()  # image processing
        self.memory = Memory.Memory(self.rb_capacity)  # replay buffer
        '''--------------------------Images----------------------------'''
        # current image
        self.my_img = np.zeros(50)
        # image stack
        self.image_stack = np.zeros(shape=[self.mini_batch_size, 50])
        self.image_stack_help = RingBuffer(self.mini_batch_size,
                                           dtype=(np.uint8, (1, 50)))
        # flattened current multiple images
        self.my_mult_img = np.zeros(shape=[1, self.mini_batch_size * 50])
        # last images
        self.last_imgs = np.copy(self.my_mult_img)
        # number of images received in total
        self.img_cnt = 0
        '''-------------------------Episodes---------------------------'''
        # episodes
        self.explorationMode = False  # exploring or exploiting?
        self.episode_counter = 0  # number of done episodes
        self.step_counter = 0  # counts every while iteration
        self.steps_in_episode = 0  # counts steps inside current episode
        # variables deciding whether to explore or to exploit
        # old --> these are currently used
        self.exploration_prob = 0.99
        # self.exploration_prob = 0.8
        self.decay_rate = 0.01
        self.min_exploration_rate = 0.01
        self.max_exploration_rate = 1
        self.decay_per_episode = self.calc_decay_per_episode()
        # new
        self.epsilon = 1
        self.epsilon_min = 0.001
        self.epsilon_decay = 0.999
        '''-------------------------Strings----------------------------'''
        # strings to display actions
        self.action_strings = {
            0: "sharp left",
            1: "left",
            2: "slightly left",
            3: "forward",
            4: "slightly right",
            5: "right",
            6: "sharp right",
            7: "stop"
        }

        # strings to display states
        self.state_strings = {
            0: "far left",
            1: "left",
            2: "slightly left",
            3: "middle",
            4: "slightly right",
            5: "right",
            6: "far right",
            7: "lost"
        }
        '''----------------------Neural network------------------------'''
        # neural network
        # tensorflow session object
        self.sess = tf.compat.v1.Session()
        # policy network
        input_shape = np.shape(self.my_mult_img)
        self.policy_net = Network.Network(mini_batch_size=self.mini_batch_size)
        # target array (for simple way)
        self.targets = np.zeros(shape=[1, (len(self.action_strings) - 1)])
        # target network to calculate 'optimal' q-values
        self.target_net = Network.Network(mini_batch_size=self.mini_batch_size)
        # copy weights and layers from the policy net into the target net
        self.target_net = self.policy_net.copy(self.target_net)
        '''--------------------------Driving---------------------------'''
        # terminal states
        self.lost_line = 7
        self.stop_action = 7

        # current state and action (initially negative)
        self.curr_state = -1
        self.curr_action = -1
        self.last_state = self.curr_state

        # starting coordinates of the robot
        self.x_position = -0.9032014349
        self.y_position = -6.22487658223
        self.z_position = -0.0298790967155

        # deviation from speed to turn the robot to the left or right
        # sharp curve => big difference
        self.sharp = self.speed * (1.0 / 7.0)
        # middle curve => middle difference
        self.middle = self.speed * (1.0 / 8.5)
        # slight curve => slight difference
        self.slightly = self.speed * (1.0 / 10.0)

        # flag to indicate end of learning
        self.first_time = True
        '''----------------------------ROS-----------------------------'''
        # message to post on topic /cmd_vel
        self.vel_msg = Twist()

        # ROS variables
        # publisher to publish on topic /cmd_vel
        self.velocity_publisher = rospy.Publisher('/cmd_vel',
                                                  Twist,
                                                  queue_size=100)
        # initializing ROS-node
        rospy.init_node('reinf_matrix_driving', anonymous=True)
        # subscriber for topic '/camera/image_raw'
        self.sub = rospy.Subscriber('/camera/image_raw', Image,
                                    self.cam_im_raw_callback)
        '''------------------------statistics--------------------------'''
        self.step_array = np.zeros(shape=[self.max_episodes + 1])
        self.actions = 0
        self.pics_since_action = 0