Esempi in Python per DQNAgent.replay

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: dqn

Classe/tipologia: DQNAgent

Metodo/funzione: replay

Esempi su hotexamples.com: 5

DQNAgent.replay in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per dqn.DQNAgent.replay, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

DQNAgent(30)

act(13)

load(11)

compile(8)

fit(5)

save(5)

train(5)

replay(5)

test(4)

save_weights(4)

remember(4)

get_action(4)

load_model(4)

actDeterministically(4)

epsilon(3)

save_model(3)

load_weights(3)

target_model(2)

observe(2)

start(2)

get_last_observations(2)

end(2)

train_one_episode(1)

train_model(1)

trainAgent(1)

train_only(1)

update_epoch(1)

update_replay_memory(1)

test_one_episode(1)

test_model(1)

update_target(1)

store_transition(1)

train_rnn(1)

testAgent(1)

update_target_model(1)

train_vae(1)

training(1)

restart_epoch(1)

store_experience(1)

load_state_dict(1)

__init__(1)

act_2(1)

append_sample(1)

backword(1)

fill_memory(1)

get_test_loss(1)

learn(1)

loss(1)

step(1)

parameters(1)

Esempio n. 1

Mostra file

def main(argv):
    args = parser.parse_args(argv[1:])

    if args.usage == 'help':
        return parser.print_help()

    if is_environments_gen(args):
        _write_env_file(args)
    elif is_environments_list(args):
        all_registry = registry.all()
        registry_envs_name = [
            trim_env_spec_name(env.__repr__()) for env in all_registry
        ]
        for environment in registry_envs_name:
            print(environment)
    elif is_environments_act(args):
        env = gym.make(args.environment_name)
        if is_action_type('dqn', args):
            if args.pre_defined_state_size == 'nesgym':
                pre_state_size = 172032
            elif args.pre_defined_state_size == 'gym':
                pre_state_size = env.observation_space.shape[0]
            elif args.pre_defined_state_size == 'gym-atari':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-atari-extend':
                pre_state_size = 120000
            elif args.pre_defined_state_size == 'gym-atari-small':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-gomoku':
                pre_state_size = 361
            # state_size = (1,) + env.observation_space.shape
            state_size = pre_state_size
            action_size = env.action_space.n
            agent = DQNAgent(state_size, action_size)
            # try:
            #     agent.load('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
            #                                           args.i_episodes))
            # except Exception:
            #     pass
            done = False
            batch_size = 64
        i_episodes = args.i_episodes
        timesteps = args.timesteps
        factor = args.seed_factor
        for i_episode in range(i_episodes):
            state = env.reset()
            if is_action_type('dqn', args):
                state = np.reshape(state, [1, pre_state_size])
            for t in range(timesteps):
                try:
                    if args.render == 'present': env.render()
                    if args.render == 'presented': env.render(args.render)
                    if args.action_type == 'alternate':
                        action_choice = i_episodes * 2
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'specific':
                        action = env.action_space.sample()
                    elif args.action_type == 'conditional':
                        action_choice = i_episodes
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'numerical':
                        action = env.action_space.n
                    elif is_action_type('dqn', args) and len(state) == 5:
                        action = agent.act(state)
                    elif is_action_type('dqn', args) and len(state) != 5:
                        action = env.action_space.sample()
                    collect_stat(action, ['input', 'actions'], stats)
                    observation, reward, done, info = env.step(action)
                    if is_action_type('dqn', args):
                        reward = reward if not done else -10
                        observation = np.reshape(observation,
                                                 [1, pre_state_size])
                        agent.remember(state, action, reward, observation,
                                       done)
                        state = observation
                    # collect_stat(observation,['observation'],stats)
                    collect_stat(reward, ['rewards'], stats)
                    # collect_stat(done,['output','done'],stats)
                    # collect_stat(info,['output','info'],stats)
                    if done:
                        max_episodes_range = (i_episodes - 1)
                        episode_timesteps_iteration_limit = max_episodes_range - 1
                        is_latest_episode = is_filled_latest_episode_with_iteration(
                            i_episode, episode_timesteps_iteration_limit)
                        increased_timestep = increase_timestep(t)
                        print('i_episode {}'.format(i_episode))
                        print('Episode finished after {} timesteps'.format(
                            increased_timestep))
                        if is_action_type('dqn', args):
                            print('Episode: {}/{}, score: {}, e: {:.2}'.format(
                                i_episode, i_episodes, t, agent.epsilon))
                        collect_stat(t, ['output', 'timestep', 'iteration'],
                                     stats)
                        collect_stat(increased_timestep,
                                     ['output', 'timestep', 'increased'],
                                     stats)
                        is_latest_episode_to_save_state = lambda args_cached: is_latest_episode and args_cached.output_stats_filename
                        if is_latest_episode_to_save_state(args):
                            filename = args.output_stats_filename
                            pre_df = {
                                # 'observations': stats['observations'],
                                'rewards': stats['rewards'],
                                # 'done-output': stats['output']['done'],
                                # 'info-output': stats['output']['info'],
                                # 'iteration-timestep': stats['output']['timestep']['iteration'],
                                # 'increased-timestep': stats['output']['timestep']['increased'],
                                'actions-input': stats['input']['actions']
                            }
                            df = pd.DataFrame(pre_df)
                            stamp = lambda: '%s' % (int(datetime.now().
                                                        timestamp()))
                            with open(
                                    'data/{}-{}.csv'.format(stamp(), filename),
                                    'w') as f:
                                f.write(df.to_csv())
                                f.close()
                            print('Statistics file saved ({}.csv)!'.format(
                                filename))
                            del df
                            del filename
                        print(check_output_env_label())
                        del is_latest_episode_to_save_state
                        del increased_timestep
                        del is_latest_episode
                        del episode_timesteps_iteration_limit
                        del max_episodes_range
                        break
                except Exception as e:
                    print('Rendering execution ({})'.format(e))
                finally:
                    print('Execution of timestep done')
            if is_action_type('dqn',
                              args) and (len(agent.memory) > batch_size):
                agent.replay(batch_size)
        # agent.save('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
        #                                       args.i_episodes))
        # env.close()
    else:
        parser.print_help()

Esempio n. 2

Mostra file

File: server_org.py Progetto: Brook1711/car-fight-with-dqn

class VideoStreamingTest(object):
    def __init__(self, host, port):
        self.state_size = 3
        self.action_size = 7
        self.done = False
        self.batch_size = 32
        self.agent = DQNAgent(self.state_size, self.action_size)
        self.state_now = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                    [1, self.state_size])
        self.state_last = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                     [1, self.state_size])
        self.action_for_next = 0
        self.action_for_now = 0
        self.reward = 0
        self.forward = "T394"
        self.left = "S450"
        self.right = "S270"
        self.backward = "T330"
        self.stop = "T370"
        self.middle = "S360"
        #dqn parameters
        self.server_socket = socket.socket()
        self.server_socket.bind((host, port))
        self.server_socket.listen(0)
        self.connection, self.client_address = self.server_socket.accept()
        self.connection = self.connection.makefile("rb")
        self.host_name = socket.gethostname()
        self.host_ip = socket.gethostbyname(self.host_name)
        self.temp_result = None
        self.finnal_result = None
        self.RANGE = 350
        self.WIDTH = 720
        self.time_now = 0
        self.count = 0
        self.streaming()

    def dqn_loop(self):
        if self.finnal_result['me']['r'] > 1:
            self.done = True
        else:
            self.done = False
        if True:
            self.prepare_state()  #更新前一次状态，并获取这一次状态
            self.prepare_action()  #更新前一次动作，并获取本次操作

            if self.count == 1:
                self.prepare_reward()  #获取上一次活动的奖励
            else:
                self.count += 1
            self.act_move()  #更新小车运动状态
            if self.count == 1:
                self.remember_step()  #收集本次数据
            if len(self.agent.memory) > self.batch_size:
                self.agent.replay(self.batch_size)

    def prepare_state(self):
        self.state_last = self.state_now
        state_now_ = [self.finnal_result['me']['alpha_big'], \
        self.finnal_result['me']['alpha_small'], \
        self.finnal_result['me']['r']]
        self.state_now = np.reshape(state_now_, [1, self.state_size])
        #self.state_now = state_now_

    def prepare_action(self):
        self.action_for_now = self.action_for_next
        self.action_for_next = self.agent.act(self.state_now)

    def prepare_reward(self):  #运行条件：state_last非空
        if self.done:
            self.reward = -10
        else:
            self.reward = (self.state_last[0][2] - self.state_now[0][2]) * 100
            #self.reward = (self.state_last[2] - self.state_now[2])*100
    def remember_step(self):
        self.agent.remember(self.state_last, self.action_for_now, self.reward,
                            self.state_now, self.done)

    def act_move(self):
        if self.done:
            self.action_for_next = 0

        if self.action_for_next == 0:  #停止
            str_S = self.middle
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 1:  #前进
            str_S = self.middle
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 2:  #左转前进
            str_S = self.left
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 3:  #右转前进
            str_S = self.right
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 4:  #后退
            str_S = self.middle
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.middle
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.middle
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 5:  #左转后退
            str_S = self.left
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.left
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            str_S = self.left
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")

        elif self.action_for_next == 6:  #右转后退
            str_S = self.right
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.right
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.right
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

    def get_one_car(self, x1, y1, x2, y2):
        x0 = (x1 + x2) / 2
        y0 = (y1 + y2) / 2
        detx = x1 - x2
        dety = y1 - y2
        temp_x0 = x0 - self.WIDTH / 2
        temp_y0 = y0 - self.WIDTH / 2
        if detx > 0:
            alpha_small = math.atan(dety / detx)
        elif detx < 0:
            alpha_small = math.atan(dety / detx) + math.pi
        else:
            if dety > 0:
                alpha_small = math.pi / 2
            else:
                alpha_small = 0 - math.pi / 2

        if temp_x0 > 0:
            alpha_big = math.atan(temp_y0 / temp_x0)
        elif temp_x0 < 0:
            alpha_big = math.atan(temp_y0 / temp_x0) + math.pi
        else:
            if temp_y0 > 0:
                alpha_big = math.pi / 2
            else:
                alpha_big = 0 - math.pi / 2

        alpha_small = alpha_small / math.pi - 0.5
        alpha_big = alpha_big / math.pi - 0.5
        r = math.sqrt(temp_x0**2 + temp_y0**2) / self.RANGE
        return {
            "alpha_big": alpha_big,
            "alpha_small": alpha_small,
            "r": r,
            "x0": x0,
            "y0": y0
        }

    def get_finnal_result(self):
        red_x = self.temp_result["red"]["x"]
        red_y = self.temp_result["red"]["y"]
        green_x = self.temp_result["green"]["x"]
        green_y = self.temp_result["green"]["y"]
        blue_x = self.temp_result["blue"]["x"]
        blue_y = self.temp_result["blue"]["y"]
        yellow_x = self.temp_result["yellow"]["x"]
        yellow_y = self.temp_result["yellow"]["y"]
        finnal_temp = {}
        me_temp = self.get_one_car(red_x, red_y, green_x, green_y)
        enemy_temp = self.get_one_car(blue_x, blue_y, yellow_x, yellow_y)
        finnal_temp["me"] = me_temp
        finnal_temp["enemy"] = enemy_temp
        self.finnal_result = finnal_temp

    def draw(self, frame, lowerRGB, upperRGB, word):

        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        # 根据阈值构建掩膜
        mask = cv2.inRange(hsv, lowerRGB, upperRGB)
        # 腐蚀操作
        mask = cv2.erode(mask, None, iterations=2)
        # 膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
        mask = cv2.dilate(mask, None, iterations=2)
        cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)[-2]
        # 初始化瓶盖圆形轮廓质心
        center = None
        # 如果存在轮廓
        if len(cnts) > 0:
            # 找到面积最大的轮廓
            c = max(cnts, key=cv2.contourArea)
            # 确定面积最大的轮廓的外接圆
            ((x, y), radius) = cv2.minEnclosingCircle(c)
            # 计算轮廓的矩
            M = cv2.moments(c)
            # 计算质心
            center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
            # 只有当半径大于10时，才执行画图
            if radius > 10:
                cv2.circle(frame, (int(x), int(y)), int(radius), (0, 255, 255),
                           2)
                cv2.circle(frame, center, 5, (0, 0, 255), -1)

                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(frame, word, (int(x), int(y)), font, 1.2,
                            (255, 255, 255), 2)
                result = {}
                result["x"] = x
                result["y"] = y

                return result

    def streaming(self):

        try:
            print("Host: ", self.host_name + " " + self.host_ip)
            print("Connection from: ", self.client_address)
            print("Streaming...")
            print("Press 'q' to exit")

            redLower = np.array([170, 100, 200])
            redUpper = np.array([179, 255, 255])

            greenLower = np.array([65, 100, 100])
            greenUpper = np.array([85, 255, 255])

            #blueLower = np.array([0, 0, 150])
            #blueUpper = np.array([100, 100, 255])
            blueLower = np.array([95, 100, 100])
            blueUpper = np.array([115, 255, 255])
            yellowLower = np.array([5, 100, 100])
            yellowUpper = np.array([20, 255, 255])
            # need bytes here
            stream_bytes = b" "
            while True:
                stream_bytes += self.connection.read(1024)
                first = stream_bytes.find(b"\xff\xd8")
                last = stream_bytes.find(b"\xff\xd9")
                #str_ = 'S270'
                #str_ = str_.encode("utf-8")
                #socket_tcp.send(str_)

                #f = open('record_' + str(self.count) + '.json', 'w')
                #json.dump(dic_dump, f)
                #f.close()

                if first != -1 and last != -1:
                    jpg = stream_bytes[first:last + 2]
                    stream_bytes = stream_bytes[last + 2:]
                    image = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8),
                                         cv2.IMREAD_COLOR)
                    frame = image
                    result_red = self.draw(frame, redLower, redUpper, "RED")
                    result_green = self.draw(frame, greenLower, greenUpper,
                                             "GREEN")
                    result_blue = self.draw(frame, blueLower, blueUpper,
                                            "blue")
                    result_yellow = self.draw(frame, yellowLower, yellowUpper,
                                              "YELLOW")
                    result = {}
                    result["red"] = result_red
                    result["green"] = result_green
                    result["blue"] = result_blue
                    result["yellow"] = result_yellow

                    self.temp_result = result
                    flag = True
                    if not result_red:
                        flag = False
                    if not result_green:
                        flag = False
                    if not result_blue:
                        flag = False
                    if not result_yellow:
                        flag = False
                    if flag:
                        self.get_finnal_result()
                        self.time_now = int((time.time() - start_time) * 1000)
                        self.dqn_loop()
                        '''
                        dic_dump = {'data': self.finnal_result, 'time' : self.time_now}
                        f = open('./test_1/record_' + str(self.count) + '.json', 'w')
                        json.dump(dic_dump, f)
                        f.close()
                        self.count +=1
                        '''
                        cv2.line(frame, (int(self.temp_result["red"]["x"]),
                                         int(self.temp_result["red"]["y"])),
                                 (int(self.temp_result["green"]["x"]),
                                  int(self.temp_result["green"]["y"])),
                                 (0, 255, 0), 1, 4)
                        cv2.line(frame, (int(self.temp_result["blue"]["x"]),
                                         int(self.temp_result["blue"]["y"])),
                                 (int(self.temp_result["yellow"]["x"]),
                                  int(self.temp_result["yellow"]["y"])),
                                 (0, 255, 0), 1, 4)
                        cv2.line(frame, (int(self.finnal_result["me"]["x0"]),
                                         int(self.finnal_result["me"]["y0"])),
                                 (int(self.WIDTH / 2), int(self.WIDTH / 2)),
                                 (0, 0, 255), 4, 4)
                        cv2.line(frame,
                                 (int(self.finnal_result["enemy"]["x0"]),
                                  int(self.finnal_result["enemy"]["y0"])),
                                 (int(self.WIDTH / 2), int(self.WIDTH / 2)),
                                 (255, 0, 0), 4, 4)
                        font = cv2.FONT_HERSHEY_SIMPLEX
                        cv2.putText(frame,
                                    str(self.finnal_result["me"]["alpha_big"]),
                                    (int(self.finnal_result["me"]["x0"]),
                                     int(self.finnal_result["me"]["y0"])),
                                    font, 1, (0, 255, 0), 1)
                        cv2.putText(
                            frame,
                            str(self.finnal_result["enemy"]["alpha_small"]),
                            (int(self.finnal_result["enemy"]["x0"]),
                             int(self.finnal_result["enemy"]["y0"])), font, 1,
                            (0, 255, 0), 1)
                    else:
                        str_S = "S360"
                        str_T = "T370"
                        str_S = str_S.encode("utf-8")
                        str_T = str_T.encode("utf-8")
                        socket_tcp.send(str_S)
                        socket_tcp.send(str_T)
                    #print(self.finnal_result)
                    cv2.imshow("Frame", frame)

                    if cv2.waitKey(1) & 0xFF == ord("q"):
                        break
        finally:
            self.connection.close()
            self.server_socket.close()

Esempio n. 3

Mostra file

File: dqn_example.py Progetto: AndreErvilha/soccerGym

        next_state, reward, done, _ = env.step2(commands)
        # reward = reward if not done else -10
        if (time == 0):
            last_reward = reward - 1

        if (reward <= last_reward and done == False):
            # print('bad reward')
            last_reward = reward
            reward = -1000
        else:
            # print('god reward')
            last_reward = reward

        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print("done     => episode: {}/{}, score: {:.2f}, e: {:.2}".format(
                e, EPISODES, reward, agent.epsilon))
            agent.replay(len(agent.memory))
            break
        if len(agent.memory) > batch_size:
            # print('replay')
            agent.replay(batch_size)
        if (time == 999):
            # print(reward)
            # print(state)
            print("not done => episode: {}/{}, score: {:.2f}, e: {:.2}".format(
                e, EPISODES, reward, agent.epsilon))
    # if e % 1 == 0:
    #     agent.save("./save/example_dqn.h5")

Esempio n. 4

Mostra file

    mdp.simulator.hyst.reset()

    #  We reinitialize the memory of the flow
    state = mdp.initializeMDP(hdg0, WH)
    loss_sim_list = []
    for time in range(80):
        WH = w.generateWind()
        action = agent.act(state)
        next_state, reward = mdp.transition(action, WH)
        agent.remember(
            state, action, reward,
            next_state)  # store the transition + the state flow in the
        # final state !!
        state = next_state
        if len(agent.memory) >= batch_size:
            loss_sim_list.append(agent.replay(batch_size))
            # For data visualisation
            i.append(mdp.s[0, -1])
            v.append(mdp.s[1, -1])
            r.append(mdp.reward)

    loss_over_simulation_time = np.sum(np.array([loss_sim_list])[0]) / len(
        np.array([loss_sim_list])[0])
    loss_of_episode.append(loss_over_simulation_time)
    print("Initial Heading : {}".format(hdg0_rand))
    print("----------------------------")
    print("episode: {}/{}, Mean Loss = {}".format(e, EPISODES,
                                                  loss_over_simulation_time))
    print("----------------------------")
agent.save("../Networks/dqn-test")

Esempio n. 5

Mostra file

File: brokerage_trading.py Progetto: Tuaman/CS229project

            state = np.reshape(state, [1, state_size])
            for time in range(500):
                # env.render()
                action = agent.act(state)
                next_state, reward, done, _ = env.step(action)
                # reward = reward if not done else -10
                next_state = np.reshape(next_state, [1, state_size])
                agent.remember(state, action, reward, next_state, done)
                state = next_state
                # print(action, reward)
                if done:
                    print("episode: {}/{}, score: {}, e: {:.5}".format(
                        e, EPISODES, time, agent.epsilon))
                    break
                if len(agent.memory) > batch_size:
                    agent.replay(batch_size)
            if e % 10 == 0:
                save_string = './save/' + stock_name + '_weights_with_fees.h5'
                agent.save(save_string)

# # serialize model to JSON
# model_json = model.to_json()
# with open("model.json", "w") as json_file:
#     json_file.write(model_json)
# # serialize weights to HDF5
# model.save_weights("model.h5")
# print("Saved model to disk")

# # later...

# # load json and create model