Python experience_buffer Exemples, DQN.experience_buffer Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Agent.py Projet : syleLim/DRQN_car

    def __init__(self, flag):
        self.batch_size = 64  #How many experiences to use for each training step.
        self.update_freq = 4  #How often to perform a training step.
        self.y = .99  #Discount factor on the target Q-values
        self.startE = 1  #Starting chance of random action
        self.endE = 0.1  #Final chance of random action
        self.annealing_steps = 10000.  #How many steps of training to reduce startE to endE.
        self.num_episodes = 10000  #How many episodes of game environment to train network with.
        self.pre_train_steps = 10000  #How many steps of random actions before training begins.
        self.max_epLength = 300  #The max allowed length of our episode.
        self.load_model = False  #Whether to load a saved model.
        self.path = "./dqn"  #The path to save our model to.
        self.h_size = 512  #The size of the final convolutional layer before splitting it into Advantage and Value streams.
        self.tau = 0.001  #Rate to update target network toward primary network
        self.action_num = 5

        tf.reset_default_graph()
        self.mainQN = DQN(self.h_size, self.action_num)
        self.targetQN = DQN(self.h_size, self.action_num)

        self.init = tf.global_variables_initializer()

        self.saver = tf.train.Saver()

        self.trainables = tf.trainable_variables()

        self.targetOps = self.updateTargetGraph(self.trainables, self.tau)

        self.myBuffer = experience_buffer()

        # Set the rate of random action decrease.
        self.e = self.startE
        self.stepDrop = (self.startE - self.endE) / self.annealing_steps

        # create lists to contain total rewards and steps per episode
        self.jList = []
        self.rList = []
        self.total_steps = 0

        self.game = sim(200, True)

        self.is_Train = flag

        # for Tracking
        self.cap = None
        self.col = -1
        self.width = -1
        self.row = -1
        self.height = -1
        self.frame = None
        self.frame2 = None
        self.inputmode = False
        self.rectangle = False
        self.trackWindow = None
        self.roi_hist = None
        self.roi = None
        self.caffe_model_path = './MobileNetSSD_deploy.caffemodel'
        self.prorotxt_path = './MobileNetSSD_deploy.prototxt.txt'
        self.net = None
        self.obstacle_points = []
        self.target_point = None
        self.obstacle_box_color = (0, 0, 255)
        self.tracker_types = [
            'BOOSTING', 'MIL', 'KCF', 'TLD', 'MEDIANFLOW', 'GOTURN'
        ]
        self.tracker_type = self.tracker_types[2]

        if self.tracker_type == 'BOOSTING':
            self.tracker = cv2.TrackerBoosting_create()
        if self.tracker_type == 'MIL':
            self.tracker = cv2.TrackerMIL_create()
        if self.tracker_type == 'KCF':
            self.tracker = cv2.TrackerKCF_create()
        if self.tracker_type == 'TLD':
            self.tracker = cv2.TrackerTLD_create()
        else:
            self.tracker = cv2.TrackerMedianFlow_create()

Exemple #2

0

Afficher le fichier

Fichier : Agent.py Projet : syleLim/DRQN_car

    def Play(self):
        if not self.is_Train:
            print('load image_model ...')
            self.net = cv2.dnn.readNetFromCaffe(self.prorotxt_path,
                                                self.caffe_model_path)

        with tf.Session() as sess:
            sess.run(self.init)
            if self.load_model == True:
                print('load_model ...')
                ckpt = tf.train.get_checkpoint_state(path)
                saver.restore(sess, ckpt.model_checkpoint_path)

            for i in range(self.num_episodes):
                if not self.is_Train:
                    CLASSES = ['bottle']
                    #["background", "aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat",
                    # "chair", "cow", "diningtable","dog", "horse", "motorbike", "person", "pottedplant",
                    # "sheep","sofa", "train", "tvmonitor"

                    self.col = -1
                    self.width = -1
                    self.row = -1
                    self.height = -1
                    self.frame = None
                    self.frame2 = None
                    self.inputmode = False
                    self.rectangle = False
                    self.trackWindow = None
                    self.roi_hist = None

                    self.cap = VideoStream(src=0).start()
                    time.sleep(2.0)
                    fps = FPS().start()

                    cv2.namedWindow('frame')
                    cv2.setMouseCallback('frame',
                                         self.onMouse,
                                         param=(self.frame, self.frame2))

                    termination = (cv2.TERM_CRITERIA_EPS
                                   | cv2.TERM_CRITERIA_COUNT, 10, 1)

                episode_buffer = experience_buffer()

                state = self.game.Reset()
                state = self.processState(state)

                dead = False
                reward_all = 0

                while True:
                    if not self.is_Train:
                        is_game_start = False
                        self.frame = self.cap.read()
                        #print(self.frame)

                        self.frame = imutils.resize(self.frame,
                                                    width=200,
                                                    height=200)

                        (h, w) = self.frame.shape[:2]
                        blob = cv2.dnn.blobFromImage(
                            cv2.resize(self.frame, (300, 300)), 0.007843,
                            (300, 300), 127.5)

                        self.net.setInput(blob)
                        detections = self.net.forward()

                        self.obstacle_points = []
                        for x in np.arange(0, detections.shape[2]):
                            confidence = detections[0, 0, x, 2]

                            if confidence > 0.2:  ### set for changing
                                idx = int(detections[0, 0, x, 1])
                                box = detections[0, 0, x, 3:7] * np.array(
                                    [w, h, w, h])
                                (startX, startY, endX,
                                 endY) = box.astype('int')

                                label = "{}: {:.2f}%".format(
                                    'obstacle', confidence * 100)
                                cv2.rectangle(self.frame, (startX, startY),
                                              (endX, endY),
                                              self.obstacle_box_color, 2)
                                self.obstacle_points.append({
                                    'row':
                                    startY,
                                    'col':
                                    startX,
                                    'row_size':
                                    endY - startY,
                                    'col_size':
                                    endX - startX
                                })

                        if self.trackWindow is not None:
                            hsv = cv2.cvtColor(self.frame, cv2.COLOR_BGR2HSV)
                            dst = cv2.calcBackProject([hsv], [0],
                                                      self.roi_hist, [0, 180],
                                                      1)
                            ## Maybe this window is track size
                            ret, self.trackWindow = cv2.meanShift(
                                dst, self.trackWindow, termination)

                            x, y, w, h = self.trackWindow
                            self.target_point = {
                                'row': int((2 * y + h) / 2),
                                'col': int((2 * x + w) / 2)
                            }
                            cv2.rectangle(self.frame, (x, y), (x + w, y + w),
                                          (0, 255, 0), 3)
                            is_game_start = True
                        else:
                            self.target_point = {
                                'row': -1,
                                'col': -1
                            }  #in Sim m_row == -1 is_show = False

                        show_frame = cv2.resize(self.frame, None, fx=2, fy=2)

                        cv2.imshow('frame', show_frame)

                        print(self.target_point)

                        key = cv2.waitKey(60) & 0xFF

                        if key == ord('i'):
                            print('select target')
                            self.inputmode = True
                            self.frame2 = self.frame.copy()

                            while self.inputmode:
                                cv2.imshow('frame', self.frame)
                                cv2.waitKey(0)

                        fps.update()  ### Idont know where it locatied

                        if not is_game_start:
                            continue
                        else:
                            self.game.Update_ob_points(self.target_point,
                                                       self.obstacle_points)

                    action = sess.run(
                        self.mainQN.predict,
                        feed_dict={self.mainQN.scalarInput: [state]})[0]

                    state_1, reward, dead = self.game.Step(action)

                    if dead == True:
                        break

                self.jList.append(epi)
                self.rList.append(reward_all)

                f = open('./play_graph.txt', 'a')
                f.write(str(i) + '_th Game_End = Reward : ' + str(reward_all))
                f.write('\n')
                f.close()
                self.game.Print_action_log()
                print(str(i) + '_th Game_End = Reward : ' + str(reward_all))

                if not self.is_Train:
                    cv2.destroyAllWindows()

        print("Percent of succesful episodes: " +
              str(sum(self.rList) / self.num_episodes) + "%")

        rMat = np.resize(np.array(self.rList), [len(self.rList) // 100, 100])
        rMean = np.average(rMat, 1)
        plt.plot(rMean)

Exemple #3

0

Afficher le fichier

Fichier : Agent.py Projet : syleLim/DRQN_car

    def Train(self):
        if not self.is_Train:
            print('load image_model ...')
            self.net = cv2.dnn.readNetFromCaffe(self.prorotxt_path,
                                                self.caffe_model_path)

        with tf.Session() as sess:
            sess.run(self.init)
            if self.load_model == True:
                print('load_model ...')
                ckpt = tf.train.get_checkpoint_state(path)
                saver.restore(sess, ckpt.model_checkpoint_path)

            for i in range(self.num_episodes):
                if not self.is_Train:
                    CLASSES = ['bottle']
                    #["background", "aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat",
                    # "chair", "cow", "diningtable","dog", "horse", "motorbike", "person", "pottedplant",
                    # "sheep","sofa", "train", "tvmonitor"

                    self.col = -1
                    self.width = -1
                    self.row = -1
                    self.height = -1
                    self.frame = None
                    self.frame2 = None
                    self.inputmode = False
                    self.rectangle = False
                    self.trackWindow = None
                    self.roi_hist = None
                    self.roi = None

                    self.cap = VideoStream(src=0).start()
                    time.sleep(2.0)
                    fps = FPS().start()

                    cv2.namedWindow('frame')
                    cv2.setMouseCallback('frame',
                                         self.onMouse,
                                         param=(self.frame, self.frame2))

                    #termination = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

                episode_buffer = experience_buffer()

                state = self.game.Reset()
                state = self.processState(state)

                dead = False
                reward_all = 0
                epi = 0

                while epi < self.max_epLength:
                    epi += 1

                    if not self.is_Train:
                        is_game_start = False
                        self.frame = self.cap.read()
                        #print(self.frame)

                        self.frame = imutils.resize(self.frame,
                                                    width=200,
                                                    height=200)

                        (h, w) = self.frame.shape[:2]
                        blob = cv2.dnn.blobFromImage(
                            cv2.resize(self.frame, (300, 300)), 0.007843,
                            (300, 300), 127.5)

                        self.net.setInput(blob)
                        detections = self.net.forward()

                        self.obstacle_points = []
                        for x in np.arange(0, detections.shape[2]):
                            confidence = detections[0, 0, x, 2]

                            if confidence > 0.2:  ### set for changing
                                idx = int(detections[0, 0, x, 1])
                                box = detections[0, 0, x, 3:7] * np.array(
                                    [w, h, w, h])
                                (startX, startY, endX,
                                 endY) = box.astype('int')

                                label = "{}: {:.2f}%".format(
                                    'obstacle', confidence * 100)
                                cv2.rectangle(self.frame, (startX, startY),
                                              (endX, endY),
                                              self.obstacle_box_color, 2)
                                self.obstacle_points.append({
                                    'row':
                                    startY,
                                    'col':
                                    startX,
                                    'row_size':
                                    endY - startY,
                                    'col_size':
                                    endX - startX
                                })

                        if self.trackWindow is not None:
                            # hsv = cv2.cvtColor(self.frame, cv2.COLOR_BGR2HSV)
                            # dst = cv2.calcBackProject([hsv], [0], self.roi_hist, [0, 180], 1)
                            ## Maybe this window is track size
                            #     ret, self.trackWindow = cv2.meanShift(dst, self.trackWindow, termination)

                            #     x, y, w, h = self.trackWindow
                            #     self.target_point = {'row' : int((2*y+h)/2), 'col' : int((2*x+w)/2)}
                            #     cv2.rectangle(self.frame, (x, y), (x+w, y+w), (0, 255, 0), 3)
                            #     is_game_start = True
                            # else :
                            #     self.target_point = {'row' : -1, 'col' : -1} #in Sim m_row == -1 is_show = False
                            ok, self.trackWindow = self.tracker.update(
                                self.frame)

                            if ok:
                                x, y, w, h = self.trackWindow
                                x, y, w, h = int(x), int(y), int(w), int(h)
                                self.target_point = {
                                    'row': int((2 * y + h) / 2),
                                    'col': int((2 * x + w) / 2)
                                }
                                cv2.rectangle(self.frame, (x, y),
                                              (x + w, y + w), (0, 255, 0), 3)
                                is_game_start = True
                            else:
                                cv2.putText(self.frame,
                                            "Tracking failure detected",
                                            (100, 80),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.75,
                                            (0, 0, 255), 2)
                                self.target_point = {'row': -1, 'col': -1}

                        show_frame = cv2.resize(self.frame, None, fx=2, fy=2)

                        cv2.imshow('frame', show_frame)

                        print(self.target_point)

                        key = cv2.waitKey(60) & 0xFF

                        if key == ord('i'):
                            print('select target')
                            self.inputmode = True
                            self.frame2 = self.frame.copy()

                            while self.inputmode:
                                cv2.imshow('frame', self.frame)
                                cv2.waitKey(0)

                        fps.update()  ### Idont know where it locatied

                        if not is_game_start:
                            epi -= 1
                            continue
                        else:
                            self.game.Update_ob_points(self.target_point,
                                                       self.obstacle_points)

                    if np.random.rand(
                            1
                    ) < self.e or self.total_steps < self.pre_train_steps:
                        action = self.game.Get_action()
                    else:
                        action = sess.run(
                            self.mainQN.predict,
                            feed_dict={self.mainQN.scalarInput: [state]})[0]

                    state_1, reward, dead = self.game.Step(action)
                    state_1 = self.processState(state_1)
                    self.total_steps += 1
                    episode_buffer.add(
                        np.reshape(
                            np.array([state, action, reward, state_1, dead]),
                            [1, 5]))

                    if self.total_steps > self.pre_train_steps:
                        if self.e > self.endE:
                            self.e -= self.stepDrop

                        if self.total_steps % (self.update_freq) == 0:
                            train_batch = self.myBuffer.sample(self.batch_size)

                            Q_1 = sess.run(self.mainQN.predict,
                                           feed_dict={
                                               self.mainQN.scalarInput:
                                               np.vstack(train_batch[:, 3])
                                           })
                            Q_2 = sess.run(self.targetQN.Qout,
                                           feed_dict={
                                               self.targetQN.scalarInput:
                                               np.vstack(train_batch[:, 3])
                                           })
                            end_mutiplier = -(train_batch[:, 4] - 1)
                            doubleQ = Q_2[range(self.batch_size), Q_1]
                            targetQ = train_batch[:, 2] + (self.y * doubleQ *
                                                           end_mutiplier)

                            _ = sess.run(self.mainQN.updateModel,
                                         feed_dict={
                                             self.mainQN.scalarInput:
                                             np.vstack(train_batch[:, 0]),
                                             self.mainQN.targetQ:
                                             targetQ,
                                             self.mainQN.actions:
                                             train_batch[:, 1]
                                         })

                            self.updateTarget(self.targetOps, sess)

                    reward_all += reward
                    state = state_1

                    if dead == True:
                        break

                self.myBuffer.add(episode_buffer.buffer)
                self.jList.append(epi)
                self.rList.append(reward_all)

                f = open('./graph.txt', 'a')
                f.write(
                    str(i) + '_th Game_End = Reward : ' + str(reward_all) +
                    '/ Episode : ' + str(epi))
                f.write('\n')
                f.close()

                self.game.Print_action_log()
                print(
                    str(i) + '_th Game_End = Reward : ' + str(reward_all) +
                    '/ Episode : ' + str(epi))

                if i % 100 == 0:
                    self.saver.save(sess,
                                    self.path + '/model-' + str(i) + '.ckpt')
                    print('save model')

                if len(self.rList) % 10 == 0:
                    print(self.rList)
                    print(len(self.rList))
                    print(self.total_steps)
                    print(np.mean(self.rList[-10:]))
                    print(self.e)
                    f_2 = open('./reward_mean.txt', 'a')
                    f_2.write(
                        str(i) + 'th : ' + str(np.mean(self.rList[-10:])))
                    f_2.write('\n')
                    f_2.close()

                if not self.is_Train:
                    cv2.destroyAllWindows()

            self.saver.save(sess, self.path + '/model-' + str(i) + '.ckpt')

        print("Percent of succesful episodes: " +
              str(sum(self.rList) / self.num_episodes) + "%")

        rMat = np.resize(np.array(self.rList), [len(self.rList) // 100, 100])
        rMean = np.average(rMat, 1)
        plt.plot(rMean)