Esempi in Python per Environment.Environment, esempi in Python per env.Environment.Environment

Esempio n. 1

0

Mostra file

    def test_palette_support():
        terms = (
            ('dumb', TermLevel.DUMB),
            ('linux', TermLevel.ANSI_BASIC),
            ('xterm-color', TermLevel.ANSI_BASIC),
            ('xterm-256color', TermLevel.ANSI_EXTENDED),
        )
        for name, result in terms:
            detection.env = Environment(environ=dict(TERM=name))
            assert detection.detect_terminal_level() == result

        detection.env = Environment(environ=dict(COLORTERM='24bit'))
        assert detection.detect_terminal_level() == TermLevel.ANSI_DIRECT

        from . import windows  # try win implementation
        terms = (
            ('dumb', TermLevel.DUMB),
            ('xterm-color', TermLevel.ANSI_BASIC),
            ('xterm-256color', TermLevel.ANSI_EXTENDED),
            ('cygwin', TermLevel.ANSI_DIRECT),  # ?
        )
        for name, result in terms:
            windows.env = Environment(environ=dict(TERM=name))
            assert windows.detect_terminal_level() == result

        windows.env = Environment(environ=dict(ANSICON='1'))
        assert windows.detect_terminal_level() == TermLevel.ANSI_EXTENDED

Esempio n. 2

0

Mostra file

def play_before_step_n(p_net, p_net_op, sl_net, sl_net_op, n):  # N回まではSL networkで. それ以降はPolicy netでプレイする.
    # N回目の黒の盤面情報を保存するが,それ以降はPolicy net同士で対戦して,勝敗を決める.
    env = Environment(sl_net_op, "black")
    flag = False
    for i in range(n):
        probs = sl_net(torch.from_numpy(env.get_state()).double())
        act = select_legal_hand(probs, env.othello.legal_hands("black"))
        _, _, done = env.step(act)
        if done:
            return False
    state = env.get_state()  # N回目の試行が終わった段階でのstateを保存する
    env2 = Environment(p_net_op, "black")
    env2.import_othello(env.othello)  # env2に以降する
    while True:
        probs = p_net(torch.from_numpy(env2.get_state()).double())
        act = select_legal_hand(probs, env2.othello.legal_hands("black"))
        _, rew, done = env.step(act)
        if done:
            if rew == 1.0:
                color = "black"
            elif rew == -1.0:
                color = "white"
            else:
                color = "none"
            ans = train_data(state, color)
            global data_queue
            data_queue = np.append(data_queue, ans)
            return True

Esempio n. 3

0

Mostra file

File: test_suite.py Progetto: Maxim-Kazliakouski/Python_stepik_automatization

    def test_terminal_level_detection_override():
        terms = (
            ('dumb', (TermLevel.DUMB, '@')),
            ('linux', (TermLevel.ANSI_BASIC, '@')),
            ('xterm-color', (TermLevel.ANSI_BASIC, '@')),
            ('xterm-256color', (TermLevel.ANSI_EXTENDED, '@')),
            ('xterm-direct', (TermLevel.ANSI_DIRECT, '@')),
            ('fbterm', (TermLevel.ANSI_EXTENDED, '@')),
        )
        for name, expected in terms:
            detection.env = Environment(environ=dict(TERM=name, PY_CONSOLE_COLOR_SEP='@'))
            if name == 'fbterm':  # :-/
                detection.is_fbterm = True
            assert detection.detect_terminal_level() == expected

        from . import windows  # try win implementation
        terms = (
            ('dumb', (TermLevel.DUMB, '@')),
            ('xterm-color', (TermLevel.ANSI_BASIC, '@')),
            ('xterm-256color', (TermLevel.ANSI_EXTENDED, '@')),
            ('cygwin', (TermLevel.ANSI_DIRECT, '@')),  # ?
        )
        for name, expected in terms:
            windows.env = Environment(environ=dict(TERM=name, PY_CONSOLE_COLOR_SEP='@'))
            assert windows.detect_terminal_level() == expected

        windows.env = Environment(environ=dict(ANSICON='1'))
        assert windows.detect_terminal_level() ==( TermLevel.ANSI_EXTENDED, ';')

Esempio n. 4

0

Mostra file

    def test_color_allowed():
        detection.env = Environment(environ={})
        assert detection.color_is_allowed() is True

        detection.env = Environment(environ=dict(CLICOLOR='0'))
        assert detection.color_is_allowed() is False

        detection.env = Environment(environ=dict(NO_COLOR=''))
        assert detection.color_is_allowed() is False

Esempio n. 5

0

Mostra file

File: test_suite.py Progetto: Maxim-Kazliakouski/Python_stepik_automatization

    def test_color_disabled_none_false():
        detection.env = Environment(environ={})
        assert detection.color_is_disabled() is None

        detection.env = Environment(environ=dict(CLICOLOR=''))
        assert detection.color_is_disabled() is None

        detection.env = Environment(environ=dict(CLICOLOR='1'))
        assert detection.color_is_disabled() is False

Esempio n. 6

0

Mostra file

File: test_suite.py Progetto: Maxim-Kazliakouski/Python_stepik_automatization

    def test_color_forced():
        detection.env = Environment(environ={})
        assert detection.color_is_forced().value is None

        detection.env = Environment(environ=dict(CLICOLOR_FORCE='0'))
        assert detection.color_is_forced() is False

        detection.env = Environment(environ=dict(CLICOLOR_FORCE='foo'))
        assert detection.color_is_forced() is True

        detection.env = Environment(environ=dict(CLICOLOR_FORCE='1'))
        assert detection.color_is_forced() is True

Esempio n. 7

0

Mostra file

File: utilities.py Progetto: koenboeckx/VKHO

def train_iteratively(args):
    # iteration 1
    team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)]
    team_red = [
        Agent(args.n_friends + idx, "red") for idx in range(args.n_enemies)
    ]

    training_agents = team_blue

    agents = team_blue + team_red
    env = Environment(agents)

    args.n_actions = 6 + args.n_enemies
    args.n_inputs = 4 + 3 * (args.n_friends - 1) + 3 * args.n_enemies

    model = ForwardModel(input_shape=args.n_inputs, n_actions=args.n_actions)

    for agent in training_agents:
        agent.set_model(model)

    training_agents = train_agents(env, training_agents, args)
    trained_model = copy.deepcopy(training_agents[0].model)

    for iteration in range(args.n_iterations):
        args.n_steps = 10000 * (iteration + 2)
        team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)]
        team_red = [
            PGAgent(args.n_friends + idx, "red")
            for idx in range(args.n_enemies)
        ]

        training_agents = team_blue

        agents = team_blue + team_red
        env = Environment(agents, args)

        model = ForwardModel(input_shape=args.n_inputs,
                             n_actions=args.n_actions)
        model.load_state_dict(trained_model.state_dict())
        model.eval()
        for agent in team_red:
            agent.set_model(model)

        model = ForwardModel(input_shape=args.n_inputs,
                             n_actions=args.n_actions)
        for agent in team_blue:
            agent.set_model(model)

        training_agents = train_agents(env, training_agents, args)
        trained_model = copy.deepcopy(training_agents[0].model)
    torch.save(trained_model.state_dict(),
               args.path + f'RUN_{get_run_id()}.torch')

Esempio n. 8

0

Mostra file

File: main.py Progetto: ljd2439/DQN

	def __init__(self):
		self.state_dim = STATE_DIM	# m x n   grid
		self.action_dim = ACTION_DIM 	# action : up, down, right, left
		self.env = Environment(self.state_dim, self.action_dim)
		self.agent = Agent(self.state_dim, self.action_dim)

		self.episode = 0
		self.batch_size = BATCH_SIZE

		self.isTraining = TRAINING
		self.isPlaying = PLAYING

		#======opengl setting======
		argv = sys.argv
		glutInit(argv)
		glutInitWindowPosition(0,0)
		glutInitWindowSize(WIN_WIDTH, WIN_HEIGHT)
		glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH)
		glutCreateWindow("DQN example")
		glutDisplayFunc(self.display)
		glutReshapeFunc(self.reshape)
		glutKeyboardFunc(self.keyCB)
		#======================

		if self.isTraining:
			self.training()
		else:
			if self.isPlaying:
				self.agent.load(saved_weight)
				self.playing()

		glutMainLoop()

Esempio n. 9

0

Mostra file

def main(_):
    #cpu only
    tf.config.experimental.set_visible_devices([], 'GPU')
    tf.get_logger().setLevel('INFO')
    #tf.debugging.set_log_device_placement(True)

    config = get_config(FLAGS) or FLAGS
    env = Environment(config, is_training=True)
    game = CFRRL_Game(config, env)
    model_weights_queues = []
    experience_queues = []
    if FLAGS.num_agents == 0 or FLAGS.num_agents >= mp.cpu_count():
        FLAGS.num_agents = mp.cpu_count() - 1
    print('Agent num: %d, iter num: %d\n'%(FLAGS.num_agents+1, FLAGS.num_iter))
    for _ in range(FLAGS.num_agents):
        model_weights_queues.append(mp.Queue(1))
        experience_queues.append(mp.Queue(1))

    tm_subsets = np.array_split(game.tm_indexes, FLAGS.num_agents)

    coordinator = mp.Process(target=central_agent, args=(config, game, model_weights_queues, experience_queues))

    coordinator.start()

    agents = []
    for i in range(FLAGS.num_agents):
        agents.append(mp.Process(target=agent, args=(i, config, game, tm_subsets[i], model_weights_queues[i], experience_queues[i])))

    for i in range(FLAGS.num_agents):
        agents[i].start()

    coordinator.join()

Esempio n. 10

0

Mostra file

def scenario_one():
    """
    This scenario consists of a single agent type with constant c and state variables x, y, z
    The dynamics of this system are define as:
    # x = y + z + c
    # y = 2z
    # z = sin(x - y)
    """
    env = Environment()
    agent = Agent(['c'], ['x', 'y', 'z'], 'agent1')
    env.register_agents(agent)
    env.compile()

    constants = pd.DataFrame({'c': np.random.rand(1000)})
    states = pd.DataFrame({var: np.random.random(1000) for var in ['x', 'y', 'z']})
    data_input = {agent: {
        'constants': constants,
        'states': states
    }}

    agent_output = pd.DataFrame({
        'x': states['y'] + states['z'] + constants['c'],
        'y': 2 * states['z'],
        'z': np.sin(states['x'] - states['y'])
    })
    data_output = {agent: agent_output}

    env.solo_train(data_input, data_output)

    env.solo_test(data_input, data_output)

    # print(env.derivativeMatrix(agent, 'x', 100))

    print(env.correlation_matrix(agent))

Esempio n. 11

0

Mostra file

    def work(self, encoder_network):
        while not self.coord.should_stop() and self.epoch < self.num_epochs:

            step = 0
            env = Environment(self.img_path)

            # 开始一个episode
            while step < self.train_batches_per_epoch:

                images_np = env.load_image(self.img_offset)
                self.img_offset += 1
                raw_img, target_img, act_list, mse = env.take_action(images_np)

                encoder_network.update_state_predicter(raw_img, act_list,
                                                       target_img, self.lr)
                encoder_network.update_action_predicter(
                    raw_img, act_list, target_img, self.lr)

                # 缺少自动编码器的Loss

                encoder_network.pull_all_params()

                step += 1

            self.epoch += 1

Esempio n. 12

0

Mostra file

def main():

    start_date = datetime.datetime(2002, 1, 1)
    end_date = datetime.datetime(2021, 1, 30)
    stocks = web.DataReader('SPY', 'yahoo', start_date, end_date)

    env = Environment(stocks, LOOK_BACK, stocks.shape[1])
    in_dim = env.observation_shape[0]
    out_dim = env.action_space.n
    agent = Agent(in_dim, out_dim)
    agent.reset()
    optimizer = optim.RMSprop(agent.parameters(), lr=0.05)

    for epi in range(EPISODES):
        state = env.set_state()
        for t in range(env.total_days - env.look_back):
            action = agent.act(state)
            state, reward, done = env.step(action)
            agent.rewards.append(reward)
            if done:
                break

        loss = agent.fit(optimizer, GAMMA)
        total_reward = sum(agent.rewards)
        reward_records = agent.rewards
        agent.reset()  #clear memory after training
        print(f'Episode {epi}, Loss: {loss}, Profit: {total_reward}')

    print("----Training Over----")
    cum_rewards = np.cumsum(reward_records)
    plt.title("Cumulative Profit on Last Episode")
    plt.xlabel("Days of Trading")
    plt.ylabel("Price ($)")
    plt.plot(cum_rewards)
    plt.show()

Esempio n. 13

0

Mostra file

File: traci_env.py Progetto: flpolyproject/ATNE

    def __init__(self, sim_number, init=True, _seed=None, ATNE=True):
        super(EnvironmentListener, self).__init__()
        seed(_seed)

        self.ATNE = ATNE

        self.sim_number = sim_number

        self.post_process = PostGraph(self.sim_number,
                                      columns=[
                                          "sim_number", "sim_step", "veh_id",
                                          "edge_id", "speed", "capacity",
                                          "budget", "prev_poi"
                                      ])
        self.t = 0

        self.break_condition = False  #condition to check if all vehicles has arrived

        file_dir = os.path.dirname(GraphSetting.sumo_config)
        map_list = glob.glob(os.path.join(file_dir, r"*.map"))
        try:
            self.sim_env = self.read(map_list[0])
            print(f"loaded map from {map_list[0]}")
        except IndexError:
            print(f".map file generating for {GraphSetting.sumo_config}")
            self.sim_env = Environment()
            self.save(GraphSetting.sumo_config, self.sim_env)

        if init:

            self.initial_reward_random(GraphSetting.reward_numbers)
            self.initial_route_random(GraphSetting.car_numbers)
            self.junction_sub()

Esempio n. 14

0

Mostra file

def main():

	# Define if argument requirements
	exact_count = -1	# Set to -1 to disable exact arg checking
	min_count 	= 1    # Set to -1 to disable min arg checking
	max_count   = -1    # Set to -1 to disable max arg checking

	summary = "IBM Homework test script"
	usage   = "usage: %prog [options] <test_name> <args>"

	# Set the add custom options callback if necessary
	add_custom_options_cb = 0 # custom_options_cb

	# Allocate and initialize the scripts environment
	env = Environment(os.path.basename(__file__),
					  usage,
					  summary,
					  exact_count,
					  min_count,
					  max_count,
					  add_custom_options_cb)

	# Execute script specific functionality
	if env.error == False and env.complete == False:
		env.error = execute (env)

	# Exit the program
	env.exit()

Esempio n. 15

0

Mostra file

def envConfig():
    if request.method == 'POST':
        global marseEnv
        mydict = request.get_json()
        marseEnv = Environment(temperature=mydict.get('temperature'),
                               humidity=mydict.get('humidity'),
                               solar_flare=mydict.get('solar-flare'),
                               storm=mydict.get('storm'),
                               area_map=mydict.get('area-map'))
        if marseEnv.solar_flare:
            myrover.battery = 11

        if marseEnv.storm:
            shield_index = None
            for index, item in enumerate(myrover.inventory):
                if item.type == 'storm-shield':
                    shield_index = index
            if shield_index is not None:
                if myrover.inventory[shield_index].qty == 1:
                    myrover.inventory.pop(shield_index)
                else:
                    myrover.inventory[shield_index].qty = myrover.inventory[
                        shield_index].qty - 1
            else:
                func = request.environ.get('werkzeug.server.shutdown')
                if func is None:
                    raise RuntimeError('Not running with the Werkzeug Server')
                func()

        return Response(status=200)

Esempio n. 16

0

Mostra file

def main(cfg: DictConfig) -> None:
    "The entry point for parsing user-provided texts"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    assert cfg.input is not None
    assert cfg.language in ("english", "chinese")
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # load the model checkpoint
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)
    vocabs = checkpoint["vocabs"]

    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    input_file = hydra.utils.to_absolute_path(cfg.input)
    ds = UserProvidedTexts(input_file, cfg.language, vocabs, cfg.encoder)
    loader = DataLoader(
        ds,
        batch_size=cfg.eval_batch_size,
        collate_fn=form_batch,
        num_workers=cfg.num_workers,
        pin_memory=torch.cuda.is_available(),
    )

    env = Environment(loader, model.encoder, subbatch_max_tokens=9999999)
    state = env.reset()
    oup = (sys.stdout if cfg.output is None else open(
        hydra.utils.to_absolute_path(cfg.output), "wt"))
    time_start = time()

    with torch.no_grad():  # type: ignore
        while True:
            with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                actions, _ = model(state)
            state, done = env.step(actions)
            if done:
                for tree in env.pred_trees:
                    assert tree is not None
                    print(tree.linearize(), file=oup)
                # pred_trees.extend(env.pred_trees)
                # load the next batch
                try:
                    with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                        state = env.reset()
                except EpochEnd:
                    # no next batch available (complete)
                    log.info("Time elapsed: %f" % (time() - time_start))
                    break

    if cfg.output is not None:
        log.info("Parse trees saved to %s" % cfg.output)

Esempio n. 17

0

Mostra file

def train_multiple(summary_interval):
    models = [QNetwork, Small, Large, Dropout]
    with Environment() as env:
        summary_scores = {}
        for model in models:
            summary = train(model, env, summary_interval)
            summary_scores[model.__name__] = summary
        plot_summary(summary_scores, summary_interval)

Esempio n. 18

0

Mostra file

    def __init__(self):
        super(EnvironmentListener, self).__init__()
        self.sim_env = Environment()

        self.initial_reward_random(GraphSetting.reward_numbers)
        self.initial_route_random(GraphSetting.car_numbers)

        self.junction_sub()

Esempio n. 19

0

Mostra file

File: test_env.py Progetto: foksly/feedback-learning

def test_lava_is_terminal():
    env = Environment(CONFIG, add_agent_value=False)
    _, r, done = env.step('R')
    assert r == 0
    assert not done

    _, r, done = env.step('R')
    assert r == -10
    assert done

Esempio n. 20

0

Mostra file

File: test_env.py Progetto: foksly/feedback-learning

def test_env_reset_position_after_few_steps():
    env = Environment(CONFIG, add_agent_value=False)
    env.reset()
    env.step('R')
    env.step('U')
    assert env.agent_position != env.start_position

    env.reset()
    assert env.agent_position == env.start_position

Esempio n. 21

0

Mostra file

    def test_palette_support():
        terms = (
            ('dumb', None),
            ('linux', 'basic'),
            ('xterm-color', 'basic'),
            ('xterm-256color', 'extended'),
            # ?
        )
        pal = (1,)  # dummy palette, tests true
        for name, result in terms:
            detection.env = Environment(environ=dict(TERM=name))
            assert detection.detect_palette_support(basic_palette=pal) == (result, pal)

        detection.env = Environment(environ=dict(ANSICON='1'))
        assert detection.detect_palette_support(basic_palette=pal) == ('extended', pal)

        detection.env = Environment(environ=dict(COLORTERM='24bit'))
        assert detection.detect_palette_support(basic_palette=pal) == ('truecolor', pal)

Esempio n. 22

0

Mostra file

 def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.6, verbose=True):
     self.env = Environment()
     self.alpha = alpha
     self.gamma = gamma
     self.epsilon = epsilon
     self.verbose = verbose
     self.num_actions = 8
     self.qtable = {}
     self.current_state = self.env.detect_nearby()

Esempio n. 23

0

Mostra file

File: main.py Progetto: rbak/deep-rl-udacity-project-3

def main(args):
    if args.examine:
        with Environment(no_graphics=True) as env:
            examine(env)
    if args.random:
        with Environment(no_graphics=False) as env:
            for i in range(5):
                random(env)
    if args.train:
        with Environment(no_graphics=True) as env:
            experiment = _setup_experiment(disabled=(not args.log))
            if experiment:
                with experiment.train():
                    train(env, experiment)
            else:
                train(env, experiment)
    if args.test:
        with Environment(no_graphics=False) as env:
            test(env)

Esempio n. 24

0

Mostra file

def produce_env(total_time=600, punish_flag=True, valid_flag=False):
    environment = Environment(remain_dict=remain_dict, \
                edge_sequence=edge_sequence,\
                node_num=node_num,\
                path_dict=path_dict, \
                request_type_list=request_type_list,\
                total_time=total_time,\
                punish_flag=punish_flag, valid_flag=valid_flag
                )
    return environment

Esempio n. 25

0

Mostra file

File: agent.py Progetto: yixuanhuang98/TRPO-TensorFlow

 def __init__(self, config):
     self.sess = tf.Session()
     self.sess.__enter__()
     env = Environment(config)
     policy_net = PolicyNet(config, env)
     super().__init__(config, env, policy_net)
     self.update_policy = PPOUpdater(policy_net, config, self.logger)
     self.sess.run(tf.global_variables_initializer())
     # warm up agent.scalar
     self._run_policy(batch_size=1000)

Esempio n. 26

0

Mostra file

File: test_env.py Progetto: foksly/feedback-learning

def test_reward_for_bridge_resets_correctly():
    env = Environment(CONFIG, add_agent_value=False)
    steps = ['R', 'U', 'R', 'R', 'R']
    for step in steps:
        _, r, _ = env.step(step)

    env.reset()
    for step in steps:
        _, r, _ = env.step(step)
    assert r == 1

Esempio n. 27

0

Mostra file

File: test_env.py Progetto: foksly/feedback-learning

def test_successful_trajectory():
    env = Environment(CONFIG, add_agent_value=False)
    steps = ['R', 'U', 'R', 'R', 'R', 'R', 'R', 'U']
    for step in steps:
        s, r, done = env.step(step)

    assert s.coordinate == (1, 6)
    assert np.all(s.observation == np.array([[2, 2, 2], [0, 0, 2], [0, 0, 2]]))
    assert r == 100
    assert done

Esempio n. 28

0

Mostra file

File: test_env.py Progetto: foksly/feedback-learning

def test_step():
    env = Environment(CONFIG, add_agent_value=False)
    env.step('R')
    assert env.agent_position == (3, 2)

    env.step('U')
    assert env.agent_position == (2, 2)

    env.step('L')
    assert env.agent_position == (2, 1)

Esempio n. 29

0

Mostra file

    def train(self, my_lambda, true_star_value):
        environment = Environment()
        terminated = False
        eligibility = np.zeros([36])

        # initial state where both player and dealer draw black card
        card = round(random.uniform(1, 10))
        dealer_card = round(random.uniform(1, 10))

        states = []
        actions = []

        current_state = [dealer_card, card]
        current_action = self.epsilon_action(current_state)

        while not terminated:
            current_action_index = 1 if current_action == 'hit' else 0

            states.append(current_state)
            actions.append(current_action_index)

            old_card = card
            state_prime, reward = environment.step(current_state,
                                                   current_action)
            card = state_prime[1]

            # reward is 2 when state is not terminated because 0 overlaps with termination state draw reward
            # handle terminal state future action values to be 0
            current_reward = 0
            action_prime = None
            if reward != 2:
                current_reward = reward
                delta = current_reward + (0 - self.do_approximation(
                    [dealer_card, old_card], current_action_index))
            else:
                action_prime = self.epsilon_action([dealer_card, card])
                action_prime_index = 1 if action_prime == 'hit' else 0
                delta = current_reward + (self.do_approximation(
                    [dealer_card, card],
                    action_prime_index) - self.do_approximation(
                        [dealer_card, old_card], current_action_index))

            eligibility = (my_lambda * eligibility +
                           self.feature_vector.flatten())

            self.weights += self.step_size * delta * eligibility

            current_state = [dealer_card, card]
            current_action = action_prime

            if reward == -1 or reward == 1 or reward == 0:
                terminated = True

        mse = self.calculate_mse(true_star_value)
        return mse

Esempio n. 30

0

Mostra file

File: handler_past.py Progetto: johannah/planning_on_models

 def create_environment(self, seed):
     return Environment(
         rom_file=self.cfg['ENV']['game'],
         frame_skip=self.cfg['ENV']['frame_skip'],
         num_frames=self.cfg['ENV']['history_size'],
         no_op_start=self.cfg['ENV']['max_no_op_frames'],
         seed=seed,
         obs_height=self.cfg['ENV']['obs_height'],
         obs_width=self.cfg['ENV']['obs_width'],
         dead_as_end=self.cfg['ENV']['dead_as_end'],
         max_episode_steps=self.cfg['ENV']['max_episode_steps'])