Beispiel #1
0
    def close(self):
        """Make sure to manually close, or else you'll leak the encoder process"""
        if not self.enabled:
            return

        if self.encoder:
            logger.debug('Closing video encoder: path=%s', self.path)
            self.encoder.close()
            self.encoder = None
        else:
            # No frames captured. Set metadata, and remove the empty output file.
            os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata['empty'] = True

        # If broken, get rid of the output file, otherwise we'd leak it.
        if self.broken:
            logger.info('Cleaning up paths for broken video recorder: path=%s metadata_path=%s', self.path, self.metadata_path)

            # Might have crashed before even starting the output file, don't try to remove in that case.
            if os.path.exists(self.path):
                os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata['broken'] = True

        self.write_metadata()
Beispiel #2
0
    def _start(self, directory, video_callable=None, force=False, resume=False,
              write_upon_reset=False, uid=None, mode=None):
        """Start monitoring.

        Args:
            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        """
        if self.env.spec is None:
            logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
            env_id = '(unknown)'
        else:
            env_id = self.env.spec.id

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            if six.PY3:
                os.makedirs(directory, exist_ok=True)
            else:
                os.makedirs(directory)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error('You must provide a function, None, or False for video_callable, not {}: {}'.format(type(video_callable), video_callable))
        self.video_callable = video_callable

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(directory)
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error('''Trying to write to monitor directory {} with existing monitor files: {}.

 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id)

        if not os.path.exists(directory): os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        if mode is not None:
            self._set_mode(mode)
def test_env_semantics(spec):
	logger.warn("Skipping this test. Existing hashes were generated in a bad way")	
	return
	with open(ROLLOUT_FILE) as data_file:
		rollout_dict = json.load(data_file)

	if spec.id not in rollout_dict:
		if not spec.nondeterministic:
			logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))
		return

	logger.info("Testing rollout for {} environment...".format(spec.id))

	observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)

	errors = []
	if rollout_dict[spec.id]['observations'] != observations_now:
		errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
	if rollout_dict[spec.id]['actions'] != actions_now:
		errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
	if rollout_dict[spec.id]['rewards'] != rewards_now:
		errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
	if rollout_dict[spec.id]['dones'] != dones_now:
		errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
	if len(errors):
		for error in errors:
			logger.warn(error)
		raise ValueError(errors)
Beispiel #4
0
def clear_monitor_files(training_dir):
    files = detect_monitor_files(training_dir)
    if len(files) == 0:
        return

    logger.info('Clearing %d monitor files from previous run (because force=True was provided)', len(files))
    for file in files:
        os.unlink(file)
Beispiel #5
0
    def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None):
        modes = env.metadata.get('render.modes', [])
        self._async = env.metadata.get('semantics.async')
        self.enabled = enabled

        # Don't bother setting anything else if not enabled
        if not self.enabled:
            return

        self.ansi_mode = False
        if 'rgb_array' not in modes:
            if 'ansi' in modes:
                self.ansi_mode = True
            else:
                logger.info('Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".'.format(env))
                # Whoops, turns out we shouldn't be enabled after all
                self.enabled = False
                return

        if path is not None and base_path is not None:
            raise error.Error("You can pass at most one of `path` or `base_path`.")

        self.last_frame = None
        self.env = env

        required_ext = '.json' if self.ansi_mode else '.mp4'
        if path is None:
            if base_path is not None:
                # Base path given, append ext
                path = base_path + required_ext
            else:
                # Otherwise, just generate a unique filename
                with tempfile.NamedTemporaryFile(suffix=required_ext, delete=False) as f:
                    path = f.name
        self.path = path

        path_base, actual_ext = os.path.splitext(self.path)

        if actual_ext != required_ext:
            hint = " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." if self.ansi_mode else ''
            raise error.Error("Invalid path given: {} -- must have file extension {}.{}".format(self.path, required_ext, hint))
        # Touch the file in any case, so we know it's present. (This
        # corrects for platform platform differences. Using ffmpeg on
        # OS X, the file is precreated, but not on Linux.
        touch(path)

        self.frames_per_sec = env.metadata.get('video.frames_per_second', 30)
        self.encoder = None # lazily start the process
        self.broken = False

        # Dump metadata
        self.metadata = metadata or {}
        self.metadata['content_type'] = 'video/vnd.openai.ansivid' if self.ansi_mode else 'video/mp4'
        self.metadata_path = '{}.meta.json'.format(path_base)
        self.write_metadata()

        logger.info('Starting new video recorder writing to %s', self.path)
        self.empty = True
Beispiel #6
0
 def make(self, id):
     logger.info('Making new env: %s', id)
     spec = self.spec(id)
     env = spec.make()
     if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
         from gym.wrappers.time_limit import TimeLimit
         env = TimeLimit(env,
                         max_episode_steps=env.spec.max_episode_steps,
                         max_episode_seconds=env.spec.max_episode_seconds)
     return env
Beispiel #7
0
    def close(self):
        """Flush all monitor data to disk and close any open rending windows."""
        if not self.enabled:
            return
        self.stats_recorder.close()
        if self.video_recorder is not None:
            self._close_video_recorder()
        self._flush(force=True)

        # Stop tracking this for autoclose
        monitor_closer.unregister(self._monitor_id)
        self.enabled = False

        logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
Beispiel #8
0
 def make(self, id):
     logger.info('Making new env: %s', id)
     spec = self.spec(id)
     env = spec.make()
     # We used to have people override _reset/_step rather than
     # reset/step. Set _gym_disable_underscore_compat = True on
     # your environment if you use these methods and don't want
     # compatibility code to be invoked.
     if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
         patch_deprecated_methods(env)
     if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
         from gym.wrappers.time_limit import TimeLimit
         env = TimeLimit(env,
                         max_episode_steps=env.spec.max_episode_steps,
                         max_episode_seconds=env.spec.max_episode_seconds)
     return env
 def make(self, path, **kwargs):
     if len(kwargs) > 0:
         logger.info('Making new env: %s (%s)', path, kwargs)
     else:
         logger.info('Making new env: %s', path)
     spec = self.spec(path)
     env = spec.make(**kwargs)
     # We used to have people override _reset/_step rather than
     # reset/step. Set _gym_disable_underscore_compat = True on
     # your environment if you use these methods and don't want
     # compatibility code to be invoked.
     if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(
             env, "_gym_disable_underscore_compat", False):
         patch_deprecated_methods(env)
     if (env.spec.max_episode_steps
             is not None) and not spec.tags.get('vnc'):
         from gym.wrappers.time_limit import TimeLimit
         env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps)
     return env
Beispiel #10
0
    def do_episode(self, config):
        """

        :param config:
        :return:
        """

        # Initial values
        done = False
        score_e = 0
        step_e = 0

        # Reset environment
        self.env.reset()

        # Continue while not crashed
        while not done:

            # Show on screen
            if config['VERBOSE'] > 1:
                self.env.render()

            # Act
            action = self.act()
            _, reward, done, _ = self.env.step(action)

            # Increment score and steps
            score_e += reward
            step_e += 1
            self.step += 1

        # Append score
        self.score.append(score_e)
        self.score_100.append(score_e)
        mean_score = np.mean(self.score_100)

        # Increment episode
        self.episode += 1

        if config['VERBOSE'] > 0:
            logger.info(
                f'[Episode {self.episode}] - score: {score_e:.2f}, steps: {step_e}, '
                f'100-score: {mean_score:.2f}.')
Beispiel #11
0
    def close(self):
        """Flush all monitor data to disk and close any open rending windows."""
        super().close()

        if not self.enabled:
            return
        self.stats_recorder.close()
        if self.video_recorder is not None:
            self._close_video_recorder()
        self._flush(force=True)

        # Stop tracking this for autoclose
        monitor_closer.unregister(self._monitor_id)
        self.enabled = False

        logger.info(
            """Finished writing results. You can upload them to the scoreboard via gym.upload(%r)""",
            self.directory,
        )
Beispiel #12
0
 def sample_minibatch(self):
     """
         Sample a batch of transitions from memory.
         This only happens
             - when the memory is full
             - at some intermediate memory lengths
         Otherwise, the returned batch is empty
     :return: a batch of the whole memory
     """
     if self.memory.is_full():
         logger.info("Memory is full, switching to evaluation mode.")
         self.eval()
         transitions = self.memory.sample(len(self.memory))
         return Transition(*zip(*transitions))
     elif len(self.memory) % self.config["batch_size"] == 0:
         transitions = self.memory.sample(len(self.memory))
         return Transition(*zip(*transitions))
     else:
         return None
Beispiel #13
0
    def learn(self):
        batch_s, batch_a, batch_r, batch_t, batch_s_ = self.buffer.sample_batch(
            BATCH_SIZE)
        self.optimizer.zero_grad()

        batch_s = torch.FloatTensor(batch_s).to(self.device)
        batch_a = torch.LongTensor(batch_a).to(self.device)
        batch_r = torch.FloatTensor(batch_r).to(self.device)
        batch_s_ = torch.FloatTensor(batch_s_).to(self.device)

        q_eval = self.eval_net(batch_s).gather(1, batch_a.view((-1, 1)))
        # print(f"q_eval {q_eval.shape}", )
        q_next = self.target_net(batch_s_).detach()
        # print(f"q_next {q_next.shape}", )

        # use double Q
        if self.use_double_q:
            q_action = self.eval_net(batch_s_).max(1)[1]
            # print(f"q_action {q_action.shape}")
            q_target = batch_r.view((-1, 1)) + self.gamma * \
                q_next.gather(1, q_action.view((-1, 1)))
            # print(f"batch_r {batch_r.shape}")
            # print(f"q_target {q_target.shape}")

        else:
            q_target = batch_r + self.gamma * q_next.max(1)[0]

        loss = self.loss_func(q_eval, q_target)

        self.optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_value_(self.eval_net.parameters(), 1.0)
        self.optimizer.step()

        if self.epsilon > EPSILON_FINAL:
            self.epsilon = self.epsilon * EPSILON_DECAY

        # target parameter update
        if self.learn_iterations % TARGET_UPDATE_ITER == 0:
            self.target_net.load_state_dict(self.eval_net.state_dict())
            logger.info(f" == update targe network")
        self.learn_iterations += 1
Beispiel #14
0
    def __init__(self):
        self.__version__ = VERSION
        logger.set_level(logger.INFO)
        logger.info("carmunk {}".format(self.__version__))

        screen = pygame.display.set_mode((width, height))
        # Turn off alpha since we don't use it.
        screen.set_alpha(None)

        # Carmunk game object
        self.__game = _GameState(screen)

        # Define the action space: move left or right
        self.action_space = spaces.Discrete(2)

        # Observation space
        self.observation_space = spaces.Box(low=0,
                                            high=39,
                                            shape=(3, ),
                                            dtype=int)
def test_env_semantics(spec):
    logger.warn(
        "Skipping this test. Existing hashes were generated in a bad way")
    return
    with open(ROLLOUT_FILE) as data_file:
        rollout_dict = json.load(data_file)

    if spec.id not in rollout_dict:
        if not spec.nondeterministic:
            logger.warn(
                "Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs"
                .format(spec.id))
        return

    logger.info("Testing rollout for {} environment...".format(spec.id))

    observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(
        spec)

    errors = []
    if rollout_dict[spec.id]['observations'] != observations_now:
        errors.append(
            'Observations not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['observations'],
                observations_now))
    if rollout_dict[spec.id]['actions'] != actions_now:
        errors.append(
            'Actions not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['actions'], actions_now))
    if rollout_dict[spec.id]['rewards'] != rewards_now:
        errors.append(
            'Rewards not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
    if rollout_dict[spec.id]['dones'] != dones_now:
        errors.append(
            'Dones not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['dones'], dones_now))
    if len(errors):
        for error in errors:
            logger.warn(error)
        raise ValueError(errors)
Beispiel #16
0
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state

        # Create a prey with probability 1%
        if not self.prey.tolist():
            px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1))
            py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1))
            while [px, py] in self.snake.blocks.tolist():
                px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1))
                py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1))

            self.prey = np.array([px, py])
            logger.info("[INFO] -- New Prey at {}, {} ".format(px,py))
            

        # print(self.snake.blocks[0].tolist()) 
        if self.snake.blocks[0].tolist() in [self.prey.tolist()]:
            self.snake.eat_and_move(action)
            self.state = np.array([self.get_state()])
            self.prey = np.array([])
            logger.info("[INFO] -- Manger")
            reward = 500.
        else:
            self.snake.move(action)
            reward = -.5
            self.state = np.array([self.get_state()])
        
        done = self.snake.is_dead or self.oob(*self.snake.blocks[0])

        if done:
            logger.warn("DONE")
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
                reward = -1000
            else:
                if self.steps_beyond_done == 0:
                    logger.warn("You are calling 'step()' but it's already done !")
                self.steps_beyond_done += 1
        return self.state, reward, done, {}
Beispiel #17
0
    def _preprocess(self):
        data = pd.read_csv(Generator.dataset_path)
        message = 'Columns found in the dataset {}'.format(data.columns)
        logger.info(message)
        data = data.dropna()
        start_time_stamp = data['Timestamp'][0]
        timestamps = data['Timestamp'].apply(lambda x:
                                             (x - start_time_stamp) / 60)
        timestamps = timestamps - range(timestamps.shape[0])
        data.insert(0, 'blocks', timestamps)
        blocks = data.groupby('blocks')
        message = 'Number of blocks of continuous prices found are {}'.format(
            len(blocks))
        logger.info(message)

        self._data_blocks = []
        distinct_episodes = 0

        for name, indices in blocks.indices.items():
            ''' 
            Length of the block should exceed the history length and horizon by 1.
            Extra 1 is required to normalize each price block by previos time stamp
            '''
            if len(indices) > (self.history_length + self.horizon + 1):

                self._data_blocks.append(blocks.get_group(name))
                # similarly, we subtract an extra 1 to calculate the number of distinct episodes
                distinct_episodes = distinct_episodes + (
                    len(indices) -
                    (self.history_length + self.horizon) + 1 + 1)

        data = None
        message_list = [
            'Number of usable blocks obtained from the dataset are {}'.format(
                len(self._data_blocks))
        ]
        message_list.append(
            'Number of distinct episodes for the current configuration are {}'.
            format(distinct_episodes))
        map(logger.info, message_list)
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-b', '--base-dir', default='blackjack-1', help='Set base dir.')
    parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')    
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)
    
    num_episodes = 100000
    epsilon_decay = 8000

    policy, Q = learn(args.base_dir, num_episodes, epsilon_decay)

    final_average_return = score(policy)
    logger.info("final average returns: {}".format(final_average_return))

    plot_policy(policy, "diag_{}_{}_{}.png".format(num_episodes, epsilon_decay, final_average_return))
    
    return 0
Beispiel #19
0
    def record(self, state, action, reward, next_state, done, info):
        """
            Record a transition by performing a Fitted-Q iteration

            - push the transition into memory
            - when enough experience is acquired, sample a batch
            - perform N value iteration steps Qk -> Qk+1, ie:
                - compute the Bellman residual loss over the batch
                - Minimize it through M gradient descent steps
        :param state: a state
        :param action: an action
        :param reward: a reward
        :param next_state: a next state
        :param done: whether state is terminal
        :param info: information about the environment
        """
        if not self.training:
            return
        # Store transition to memory
        self.memory.push(state, action, reward, next_state, done, info)
        batch = self.sample_minibatch()
        if not batch:
            return
        batch = self._add_constraint_penalty(batch)
        # Optimize model on batch
        value_iteration_epochs = self.config["value_iteration_epochs"] or int(
            3 / (1 - self.config["gamma"]))
        self.initialize_model()
        for epoch in range(value_iteration_epochs):
            self.update_target_network()
            delta, target = self.compute_bellman_residual(batch)
            self.initialize_model()
            logger.info(
                "Bellman residual at iteration {} on batch {} is {}".format(
                    epoch, len(batch.reward), delta))
            for _ in range(self.config["regression_epochs"]):
                loss, _ = self.compute_bellman_residual(batch, target)
                self.step_optimizer(loss)
Beispiel #20
0
    def move(self, a):
        """Move the snake one step in direction 'a'."""
        assert a in [0, 1, 2]

        if not self.is_dead:
            diff = np.array(self.blocks[0] - self.old_head)

            if a == 0:
                dxdy = diff
            elif a == 1:
                dxdy = np.array(rotate(*diff, .5*math.pi))
            else:
                dxdy = np.array(rotate(*diff, -.5*math.pi))

            self.blocks = np.roll(self.blocks, 2)
            self.blocks[0] = self.blocks[1] + dxdy
            self.old_head = self.blocks[1]
        else:
            return

        if self.blocks[0].tolist() in self.blocks[1:].tolist():
            self.is_dead = True
        logger.info("[INFO] -- Head moved to {}".format(self.blocks[0]))
Beispiel #21
0
    def step(self, action):
        err_msg = "%r (%s) invalid" % (action, type(action))
        assert self.action_space.contains(action), err_msg

        logger.info("\n\n#####action: %s", action)
        logger.info("step: %s", self.number_of_steps)
        logger.info("window bounds: %s",
                    (self.window_x_bounds, self.window_y_bounds))
        logger.info("ball center coordinate: %s",
                    (self.ball_center_x, self.ball_center_y))
        logger.info("velocity: %s", (self.velocity_x, self.velocity_y))
        self.number_of_steps += 1

        self.ball_center_x += int(self.velocity_x * self.dt)
        self.ball_center_y += int(self.velocity_y * self.dt +
                                  1 / 2 * self.gravity * self.dt**2)
        self.velocity_y = self.velocity_y + self.gravity * self.dt
        self.check_wall_collision_and_update_state()

        reward = self.apply_force(action)
        self.state = (self.ball_center_x, self.ball_center_y, self.ball_radius)

        return np.array(self.state), reward, False, {}
def add_new_rollouts(spec_ids, overwrite):
    environments = [
        spec for spec in envs.registry.all() if spec.entry_point is not None
    ]
    if spec_ids:
        environments = [spec for spec in environments if spec.id in spec_ids]
        assert len(environments) == len(spec_ids), "Some specs not found"
    with open(ROLLOUT_FILE) as data_file:
        rollout_dict = json.load(data_file)
    modified = False
    for spec in environments:
        if not overwrite and spec.id in rollout_dict:
            logger.debug("Rollout already exists for {}. Skipping.".format(
                spec.id))
        else:
            modified = update_rollout_dict(spec, rollout_dict) or modified

    if modified:
        logger.info("Writing new rollout file to {}".format(ROLLOUT_FILE))
        with open(ROLLOUT_FILE, "w") as outfile:
            json.dump(rollout_dict, outfile, indent=2, sort_keys=True)
    else:
        logger.info("No modifications needed.")
Beispiel #23
0
    def close(self):
        """Flush all data to disk and close any open frame encoders."""
        if not self.enabled or self._closed:
            return

        if self.encoder:
            logger.debug("Closing video encoder: path=%s", self.path)
            self.encoder.close()
            self.encoder = None
        else:
            # No frames captured. Set metadata, and remove the empty output file.
            os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata["empty"] = True

        # If broken, get rid of the output file, otherwise we'd leak it.
        if self.broken:
            logger.info(
                "Cleaning up paths for broken video recorder: path=%s metadata_path=%s",
                self.path,
                self.metadata_path,
            )

            # Might have crashed before even starting the output file, don't try to remove in that case.
            if os.path.exists(self.path):
                os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata["broken"] = True

        self.write_metadata()

        # Stop tracking this for autoclose
        self._closed = True
Beispiel #24
0
    def make(self, path: str, **kwargs) -> Env:
        if len(kwargs) > 0:
            logger.info("Making new env: %s (%s)", path, kwargs)
        else:
            logger.info("Making new env: %s", path)

        # We need to manually parse the ID so we can check
        # the version without error-ing out in self.spec
        namespace, name, version = parse_env_id(path)

        # Get all versions of this spec.
        versions = self.env_specs.versions(namespace, name)

        # We check what the latest version of the environment is and display
        # a warning if the user is attempting to initialize an older version
        # or an unversioned one.
        latest_versioned_spec = max(
            filter(lambda spec: spec.version, versions),
            key=lambda spec: cast(int, spec.version),
            default=None,
        )
        if (latest_versioned_spec and version is not None
                and version < cast(int, latest_versioned_spec.version)):
            logger.warn(
                f"The environment {path} is out of date. You should consider "
                f"upgrading to version `v{latest_versioned_spec.version}` "
                f"with the environment ID `{latest_versioned_spec.id}`.")
        elif latest_versioned_spec and version is None:
            logger.warn(
                f"Using the latest versioned environment `{latest_versioned_spec.id}` "
                f"instead of the unversioned environment `{path}`")
            path = latest_versioned_spec.id

        # Lookup our path
        spec = self.spec(path)
        # Construct the environment
        return spec.make(**kwargs)
Beispiel #25
0
    def check_wall_collision_and_update_state(self):
        collision_with_ground = self.ball_center_y <= self.window_y_bounds[0]
        collision_with_left_wall = self.ball_center_x <= self.window_x_bounds[0]
        collision_with_right_wall = self.ball_center_x >= self.window_x_bounds[
            1]

        if collision_with_ground:
            logger.info("collision with ground")
            logger.info("initial velocity: %s",
                        (self.velocity_x, self.velocity_y))
            self.velocity_y = -self.velocity_y * self.damping_factor
            self.ball_center_y = int(self.window_y_bounds[0])
            logger.info("final   velocity: %s",
                        (self.velocity_x, self.velocity_y))

        elif collision_with_left_wall:
            logger.info("collision with left wall")
            self.velocity_x = -self.velocity_x * self.damping_factor
            self.ball_center_x = int(self.window_x_bounds[0])

        elif collision_with_right_wall:
            logger.info("collision with right wall")
            self.velocity_x = -self.velocity_x * self.damping_factor
            self.ball_center_x = int(self.window_x_bounds[1])
def compare_experiments(experiments: dict, full_memory=True):
    """Deep Q network for differential robot control.

    Learn to control the robot in the PathFollower environment where the actions are the forward and rotational
    velocity.
    """
    logger.info('Train new experiments')
    for name, experiment in experiments.items():
        experiment.train(render=False, full_memory=full_memory)

        experiment.target_network.model.save(f'model_{name}.h5')

    rewards = [experiment.rewards_train for experiment in experiments.values()]
    names = list(experiments.keys())
    plot_rewards(rewards, names, tag='Training')

    smooth_rewards = []
    for reward in rewards:
        smooth_rewards.append(list(pd.Series(reward).rolling(100).mean()))
    plot_rewards(smooth_rewards, names, tag='Training rolling mean')

    test_rewards = list()
    mean_test_rewards = list()
    for name, experiment in experiments.items():
        reward = experiment.test(render=False)
        test_rewards.append(reward)
        mean_test_rewards.append(np.mean(reward))

    plot_rewards(test_rewards, names, tag='Test')

    print(list(zip(names, mean_test_rewards)))

    smooth_rewards_test = []
    for reward in test_rewards:
        smooth_rewards_test.append(list(pd.Series(reward).rolling(5).mean()))
    plot_rewards(smooth_rewards_test, names, tag='Test rolling mean')
Beispiel #27
0
def update_rollout_dict(spec, rollout_dict):
    """
    Takes as input the environment spec for which the rollout is to be generated,
    and the existing dictionary of rollouts. Returns True iff the dictionary was
    modified.
    """
    # Skip platform-dependent
    if should_skip_env_spec_for_tests(spec):
        logger.info("Skipping tests for {}".format(spec.id))
        return False

    # Skip environments that are nondeterministic
    if spec.nondeterministic:
        logger.info("Skipping tests for nondeterministic env {}".format(
            spec.id))
        return False

    logger.info("Generating rollout for {}".format(spec.id))

    try:
        (
            observations_hash,
            actions_hash,
            rewards_hash,
            dones_hash,
        ) = generate_rollout_hash(spec)
    except:
        # If running the env generates an exception, don't write to the rollout file
        logger.warn(
            "Exception {} thrown while generating rollout for {}. Rollout not added."
            .format(sys.exc_info()[0], spec.id))
        return False

    rollout = {}
    rollout["observations"] = observations_hash
    rollout["actions"] = actions_hash
    rollout["rewards"] = rewards_hash
    rollout["dones"] = dones_hash

    existing = rollout_dict.get(spec.id)
    if existing:
        differs = False
        for key, new_hash in rollout.items():
            differs = differs or existing[key] != new_hash
        if not differs:
            logger.debug("Hashes match with existing for {}".format(spec.id))
            return False
        else:
            logger.warn("Got new hash for {}. Overwriting.".format(spec.id))

    rollout_dict[spec.id] = rollout
    return True
Beispiel #28
0
 def __init__(self, capacity: int, alpha: float) -> None:
     self._type = ReplayBufferTypes.Prioritized
     self.capacity = 1
     while self.capacity < capacity:
         self.capacity = self.capacity << 1
     self._buffer = []
     self._idx = 0
     self._sum_tree = SumSegmentTree(self.capacity)
     self._min_tree = MinSegmentTree(self.capacity)
     self._max_priority = 1.0
     self._alpha = alpha
     logger.info("prioritized replay buffer init")
     logger.info("\t alpha is %f", self._alpha)
     logger.info("\t capacity is %d", self.capacity)
Beispiel #29
0
def run_test(env: gym.Env, agents: Tuple[Agent, Agent],
             epoch: int) -> List[str]:
    """
    Run N matches as test

    :param env: The gym.Env
    :param agents: The two agents (Defender, Attacker)
    :param epoch: The current training epoch the agents are at
    """
    logger.info(f"Starting {SETTINGS.TEST_MATCHES} test match(es)")
    logger.info(
        f"video recording is {'enabled' if SETTINGS.RECORD_TEST_MATCHES else 'disabled'}"
    )
    test_env = env if not SETTINGS.RECORD_TEST_MATCHES else Monitor(
        env=env,
        directory=os.path.join(videos_dir, str(epoch)),
        video_callable=lambda episode_id:
        (episode_id + 1) % SETTINGS.TEST_MATCHES_RECORD_INTERVAL == 0)
    winners = []
    moves_len = []
    for ep in range(SETTINGS.TEST_MATCHES):
        moves = []
        obs = test_env.reset()
        if SETTINGS.RENDER_TEST_MATCHES:
            test_env.render()
        curr_agent = 0
        while True:
            action, _ = agents[curr_agent].choose_action(
                obs, test_env.env.action_space
                if SETTINGS.RECORD_TEST_MATCHES else env.action_space)
            moves.append(test_env.env.actions[action] if SETTINGS.
                         RECORD_TEST_MATCHES else env.actions[action])
            obs, _, done, info = test_env.step(action)
            if SETTINGS.RENDER_TEST_MATCHES:
                test_env.render()
            captures = info.get('captured')
            if len(captures) > 0:
                moves[-1] += 'x' + 'x'.join(captures)
            if done:
                write_match_infos(info, moves, f'match_{epoch}_{ep}')
                winners.append(info.get('winner', None))
                moves_len.append(len(moves))
                break
            curr_agent = 0 if curr_agent == 1 else 1
    test_env.close()
    update_summary(winners, moves_len, epoch)
    logger.info('Test match(es) completed and results saved')
    return winners
Beispiel #30
0
    def train(self, env, episodes):
        max_score = -514229
        total_step = 0
        for eps in range(self.cur_episode, episodes):
            state = env.reset()
            score = 0
            done = False

            while not done:
                if total_step < MIN_STEP_TO_TRAIN:
                    action = env.action_space.sample()
                else:
                    action = self.act(state)

                state_, reward, done, _ = env.step(action)
                total_step += 1
                score += reward
                reward = check_reward(self.env_name, state, action, reward,
                                      state_, done)
                self.buffer.add(state, action, reward, done, state_)

                if self.buffer.size > MIN_STEP_TO_TRAIN:
                    self.learn()

                state = state_

            max_score = score if score > max_score else max_score
            self.score_history.append(score)
            logger.info(
                f" == episode: {eps+1:05d} | total step: {total_step:7d} | score: {score:8.2f} | max score: {max_score:8.2f}"
            )

            if (eps + 1) % 100 == 0:
                ckpt_name = os.path.join(self.ckpt_save_path,
                                         f"ckpt_{eps}.pth")
                self.save_model(ckpt_name, eps)
                logger.info(f" == model {ckpt_name} saved")

        ckpt_name = os.path.join(self.ckpt_save_path, "ckpt_final.pth")
        self.save_model(ckpt_name, eps)
        logger.info(f" == model {ckpt_name} saved")
        figure_name = os.path.join(self.ckpt_save_path,
                                   f"{self.agent_name}.png")
        plot_figure(figure_name, self.score_history)
Beispiel #31
0
def build_dataset(matches: MatchesCollection, epoch: int):
    """
    Build the dataset for both Attacker and Defender and at the various last-moves windows

    :param matches: The matches as a collection
    :param epoch: The current epoch for training
    """
    logger.info('Starting building dataset...')
    for player in ['ATK', 'DEF']:
        logger.info(f'Building dataset for {player}...')
        ms = matches.matches.get(player)
        for lm in LAST_MOVES:
            if lm <= matches.shortest_match(player):
                samples = np.empty(shape=(matches.n_matches * lm,
                                          SHAPE_STATE[0], SHAPE_STATE[1],
                                          SHAPE_STATE[2]),
                                   dtype=np.float)
                labels = np.empty(shape=(matches.n_matches * lm))
                with_limit = True
            else:
                n_samples = 0
                for m in ms:
                    n_samples += len(m[0])
                samples = np.empty(shape=(n_samples, SHAPE_STATE[0],
                                          SHAPE_STATE[1], SHAPE_STATE[2]),
                                   dtype=np.float)
                labels = np.empty(shape=n_samples)
                with_limit = False
            i = 0
            for m in ms:
                for s, l in zip(m[0][-(lm if with_limit else 0):],
                                m[1][-(lm if with_limit else 0):]):
                    samples[i] = s
                    labels[i] = l
                    i += 1
            dataset = TablutDataset(
                samples=samples,
                labels=labels,
                name=
                f"{player}_{epoch}_{matches.n_matches}_{(lm if with_limit else 'full')}",
                transform=transforms.Compose([transforms.ToTensor()]))
            save_dataset(dataset, datasets_dir)
    logger.info('All datasets built')
Beispiel #32
0
    def train(self, env, episodes):
        max_score = -514229
        total_step = 0
        for eps in range(self.cur_episode, episodes):
            state = env.reset()
            score = 0
            done = False
            episode_step = 0
            while not done:
                action = self.predict(state)
                state_, reward, done, _ = env.step(action)
                episode_step += 1
                total_step += 1
                score += reward
                reward = check_reward(self.env_name, state, action, reward,
                                      state_, done)
                self.store_rewards(reward)
                state = state_

            self.score_history.append(score)
            max_score = score if score > max_score else max_score
            if score > -1.0 * episode_step:
                self.learn()
                logger.info(
                    f" == episode: {eps+1}, score: {score}, max score: {max_score}"
                )
            else:
                self.clear_memory()

            if (eps + 1) % 100 == 0:
                ckpt_name = os.path.join(self.ckpt_save_path,
                                         f"ckpt_{eps}.pth")
                self.save_model(ckpt_name, eps)
                logger.info(f" == model {ckpt_name} saved")

        ckpt_name = os.path.join(self.ckpt_save_path, "ckpt_final.pth")
        self.save_model(ckpt_name, eps)
        logger.info(f" == model {ckpt_name} saved")
        figure_name = os.path.join(self.ckpt_save_path,
                                   f"{self.agent_name}.png")
        plot_figure(figure_name, self.score_history)
Beispiel #33
0
    def get_transactions():
        if not Generator.dataset_path:
            Generator.set_dataset_path()

        message = 'Getting latest transactions from {}.'.format(URL) + \
                    '\nThis might take a few minutes depending upon your internet speed.'
        logger.info(message)

        path = os.path.join(Generator.temp_dir, 'coinbaseUSD.csv.gz')
        f = urllib2.urlopen(URL)
        with open(path, 'w') as buffer:
            buffer.write(f.read())
        message = 'Latest transactions saved to {}'.format(path)
        logger.info(message)

        # Read the transactions into pandas dataframe
        with gzip.open(path, 'r') as f:
            d = pd.read_table(f,
                              sep=',',
                              header=None,
                              index_col=0,
                              names=['price', 'volume'])
        os.remove(path)

        d.index = d.index.map(
            lambda ts: datetime.datetime.fromtimestamp(int(ts)))
        d.index.names = ['DateTime_UTC']
        p = pd.DataFrame(d['price'].resample('1Min').ohlc())
        p.columns = ['price_open', 'price_high', 'price_low', 'price_close']
        v = pd.DataFrame(d['volume'].resample('1Min').sum())
        v.columns = ['volume']
        p['volume'] = v['volume']
        unix_timestamps = p.index.map(
            lambda ts: int(time.mktime(ts.timetuple())))
        p.insert(0, 'Timestamp', unix_timestamps)

        p.to_csv(Generator.dataset_path, sep=',')
        message = 'Dataset sampled and saved to {}'.format(
            Generator.dataset_path)
        logger.info(message)
Beispiel #34
0
    def train(self, env, episodes):
        max_score = -514229
        total_step = 0
        for eps in range(self.cur_episode, episodes):
            state = env.reset()
            score = 0
            done = False
            while not done:
                action = self.predict(state)
                state_, reward, done, _ = env.step(action)
                total_step += 1
                score += reward
                reward = check_reward(self.env_name, state, action, reward,
                                      state_, done)
                self.buffer.add(state, action, reward, done, state_)

                if self.buffer.size > INIT_REPLAY_SIZE:
                    self.learn()
                elif self.buffer.size % 500 == 0:
                    print(f' == populate the replay buffer ... ... ')
                state = state_

            max_score = score if score > max_score else max_score
            self.score_history.append(score)
            logger.info(
                f" == episode: {eps+1}, total step: {total_step}, score: {score}, max score: {max_score}"
            )

            if (eps + 1) % 100 == 0:
                ckpt_name = os.path.join(self.ckpt_save_path,
                                         f"ckpt_{eps}.pth")
                self.save_model(ckpt_name, eps)
                logger.info(f" == model {ckpt_name} saved")

        ckpt_name = os.path.join(self.ckpt_save_path, "ckpt_final.pth")
        self.save_model(ckpt_name, eps)
        logger.info(f" == model {ckpt_name} saved")
        figure_name = os.path.join(self.ckpt_save_path,
                                   f"{self.agent_name}.png")
        plot_figure(figure_name, self.score_history)
observation = env.reset()
for frame_idx in range(1, config.MAX_FRAMES + 1):
    epsilon = config.epsilon_by_frame(frame_idx)

    action = model.get_action(observation, epsilon)

    prev_observation = observation
    observation, reward, done, _ = env.step(action)
    #observation = None if done else observation
    episode_reward += reward
    
    model.update(prev_observation, action, reward, observation, frame_idx)
    
    if done:
        logger.info('Finished episode at frame {} with a reward of {}.'.format(frame_idx, episode_reward))
        model.finish_nstep()
        model.reset_hx()
        observation = env.reset()
        model.save_reward(episode_reward)
        episode_reward = 0

    if frame_idx % 10 == 0:
        print('')
        print('')
        print('FRAME_IDX: {}'.format(frame_idx))
        print('')
        print('')
        model.save_w()
        plot_all_data(log_dir, env_id, 'DeepDip', config.MAX_FRAMES, bin_size=(10, 100, 100, 1), smooth=1, time=timedelta(seconds=int(timer()-start)), save_filename='./results.png', ipynb=False)
Beispiel #36
0
    def _start(self,
               directory,
               force=False,
               resume=False,
               write_upon_reset=False,
               uid=None,
               mode=None):
        """Start monitoring.
        Args:
            directory (str): A per-training run directory where to record stats.
            video_flg (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_flg is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        """
        if self.env.spec is None:
            logger.warn(
                "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users."
            )
            env_id = '(unknown)'
        else:
            env_id = self.env.spec.id

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            if six.PY3:
                os.makedirs(directory, exist_ok=True)
            else:
                os.makedirs(directory)

        self.video_flg = False

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(directory)
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error(
                    '''Trying to write to monitor directory {} with existing monitor files: {}.
 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''
                    .format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id,
                                         uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(
            directory,
            '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix),
            autoreset=self.env_semantics_autoreset,
            env_id=env_id)

        if not os.path.exists(directory): os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        if mode is not None:
            self._set_mode(mode)
Beispiel #37
0
 def set_monitor_mode(self, mode):
     logger.info(
         "Setting the monitor mode is deprecated and will be removed soon")
     self._set_mode(mode)
Beispiel #38
0
 def set_monitor_mode(self, mode):
     logger.info("Setting the monitor mode is deprecated and will be removed soon")
     self._set_mode(mode)
Beispiel #39
0
Datei: cem.py Projekt: joschu/gym
    info['env_id'] = env.spec.id

    # ------------------------------------------


    def noisy_evaluation(theta):
        agent = BinaryActionLinearPolicy(theta)
        rew, T = do_rollout(agent, env, num_steps)
        return rew

    # Train the agent, and snapshot each stage
    for (i, iterdata) in enumerate(
            cem(noisy_evaluation, np.zeros(env.observation_space.shape[0] + 1),
                **params)):
        print('Iteration %2i. Episode mean reward: %7.3f' %
              (i, iterdata['y_mean']))
        agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
        if args.display: do_rollout(agent, env, 200, render=True)
        writefile('agent-%.4i.pkl' % i, str(pickle.dumps(agent, -1)))

    # Write out the env at the end so we store the parameters of this
    # environment.
    writefile('info.json', json.dumps(info))

    env.close()

    logger.info(
        "Successfully ran cross-entropy method. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results."
    )
    gym.upload(outdir)
Beispiel #40
0
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    outdir = '/tmp/random-agent-results'
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)
    agent = RandomAgent(env.action_space)

    episode_count = 100
    reward = 0
    done = False

    for i in range(episode_count):
        ob = env.reset()
        while True:
            action = agent.act(ob, reward, done)
            ob, reward, done, _ = env.step(action)
            if done:
                break
            # Note there's no env.render() here. But the environment still can open window and
            # render if asked by env.monitor: it calls env.render('rgb_array') to record video.
            # Video is not recorded every episode, see capped_cubic_video_schedule for details.

    # Close the env and write monitor result info to disk
    env.close()

    # Upload to the scoreboard. We could also do this from another
    # process if we wanted.
    logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
    gym.upload(outdir)
Beispiel #41
0
Datei: cem.py Projekt: joschu/gym
    # ----------------------------------------
    def writefile(fname, s):
        with open(path.join(outdir, fname), 'w') as fh: fh.write(s)
    info = {}
    info['params'] = params
    info['argv'] = sys.argv
    info['env_id'] = env.spec.id
    # ------------------------------------------

    def noisy_evaluation(theta):
        agent = BinaryActionLinearPolicy(theta)
        rew, T = do_rollout(agent, env, num_steps)
        return rew

    # Train the agent, and snapshot each stage
    for (i, iterdata) in enumerate(
        cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
        print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
        agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
        if args.display: do_rollout(agent, env, 200, render=True)
        writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))

    # Write out the env at the end so we store the parameters of this
    # environment.
    writefile('info.json', json.dumps(info))

    env.close()

    logger.info("Successfully ran cross-entropy method. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
    gym.upload(outdir)