def __init__(self, config): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: state.default(dict(type='float')) if isinstance(state.shape, int): state.shape = (state.shape, ) if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config( config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.exploration = dict() if 'continuous' in config.actions: # only one action config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: action.default(dict(shape=(), min_value=None, max_value=None)) else: action.default(dict(shape=())) if isinstance(action.shape, int): action.shape = (action.shape, ) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config( config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) self.episode = -1 self.timestep = 0 self.reset()
def __init__(self, config): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config( config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.continuous_actions = list() self.exploration = dict() if 'continuous' in config.actions: # only one action if config.actions.continuous: self.continuous_actions.append('action') config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: self.continuous_actions.append(name) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config( config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) self.episode = 0 self.timestep = 0 # Reset internal state - needs to be called after every episode self.next_internal = self.current_internal = self.model.reset() for preprocessing in self.preprocessing.values(): preprocessing.reset()
def __init__(self, config): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config(config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.continuous_actions = list() self.exploration = dict() if 'continuous' in config.actions: # only one action if config.actions.continuous: self.continuous_actions.append('action') config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: self.continuous_actions.append(name) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config(config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) self.episode = 0 self.timestep = 0 # Reset internal state - needs to be called after every episode self.next_internal = self.current_internal = self.model.reset() for preprocessing in self.preprocessing.values(): preprocessing.reset()
def __init__(self, config, model=None): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. model (Model): optional model instance. If not supplied, a new model is created. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) self.logger = logging.getLogger(__name__) self.logger.setLevel(util.log_levels[config.log_level]) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: state.default(dict(type='float')) if isinstance(state.shape, int): state.shape = (state.shape, ) if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config( config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.exploration = dict() if 'continuous' in config.actions: # only one action config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: action.default(dict(shape=(), min_value=None, max_value=None)) else: action.default(dict(shape=())) if isinstance(action.shape, int): action.shape = (action.shape, ) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config( config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions if model is None: self.model = self.__class__.model(config) else: if not isinstance(model, self.__class__.model): raise TensorForceError( "Supplied model class `{}` does not match expected agent model class `{}`" .format( type(model).__name__, self.__class__.model.__name__)) self.model = model not_accessed = config.not_accessed() if not_accessed: self.logger.warning("Configuration values not accessed: {}".format( ', '.join(not_accessed))) self.episode = -1 self.timestep = 0 self.reset()
def __init__(self, states_spec, actions_spec, preprocessing, exploration, reward_preprocessing, batched_observe): """ Initializes the reinforcement learning agent. Args: states_spec: actions_spec: preprocessing: exploration: reward_preprocessing: batched_observe: """ # States config and preprocessing self.preprocessing = dict() if 'shape' in states_spec: # Single-state self.unique_state = True state = dict(states_spec) self.states_spec = dict(state=state) if isinstance(state['shape'], int): # Shape: int to unary tuple state['shape'] = (state['shape'], ) if 'type' not in state: # Type: default to float state['type'] = 'float' if preprocessing is not None: preprocessing = Preprocessing.from_spec(spec=preprocessing) self.preprocessing['state'] = preprocessing state['shape'] = preprocessing.processed_shape( shape=state['shape']) else: # Multi-state self.unique_state = False self.states_spec = dict(states_spec) for name, state in self.states_spec.items(): if isinstance(state['shape'], int): # Shape: int to unary tuple state['shape'] = (state['shape'], ) if 'type' not in state: # Type: default to float state['type'] = 'float' if preprocessing is not None and name in preprocessing: preprocessing = Preprocessing.from_spec( preprocessing[name]) self.preprocessing[name] = preprocessing state['shape'] = preprocessing.processed_shape( shape=state['shape']) # Actions config and exploration self.exploration = dict() if 'type' in actions_spec: # Single-action self.unique_action = True action = dict(actions_spec) self.actions_spec = dict(action=action) if action['type'] == 'int': # Check required values if 'num_actions' not in action: raise TensorForceError( "Action requires value 'num_actions' set!") elif action['type'] == 'float': if ('min_value' in action) != ('max_value' in action): raise TensorForceError( "Action requires both values 'min_value' and 'max_value' set!" ) if 'shape' not in action: # Shape: default to empty tuple action['shape'] = () if isinstance(action['shape'], int): # Shape: int to unary tuple action['shape'] = (action['shape'], ) if exploration is not None: self.exploration['action'] = Exploration.from_spec(exploration) else: # Multi-action self.unique_action = False self.actions_spec = dict(actions_spec) for name, action in self.actions_spec.items(): if action['type'] == 'int': # Check required values if 'num_actions' not in action: raise TensorForceError( "Action requires value 'num_actions' set!") elif action['type'] == 'float': if ('min_value' in action) != ('max_value' in action): raise TensorForceError( "Action requires both values 'min_value' and 'max_value' set!" ) if 'shape' not in action: # Shape: default to empty tuple action['shape'] = () if isinstance(action['shape'], int): # Shape: int to unary tuple action['shape'] = (action['shape'], ) if exploration is not None and name in exploration: self.exploration[name] = Exploration.from_spec( exploration[name]) # reward preprocessing config if reward_preprocessing is None: self.reward_preprocessing = None else: self.reward_preprocessing = Preprocessing.from_spec( reward_preprocessing) self.model = self.initialize_model( states_spec=self.states_spec, actions_spec=self.actions_spec, ) # Batched observe for better performance with Python. self.batched_observe = batched_observe if self.batched_observe is not None: self.observe_terminal = list() self.observe_reward = list() self.reset()
def __init__(self, states_spec, actions_spec, config): """ Initializes the reinforcement learning agent. Args: model (Model): optional model instance. If not supplied, a new model is created. config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ self.logger = logging.getLogger(self.__class__.__name__) # other name? self.logger.setLevel(util.log_levels[config.log_level]) # States config and preprocessing self.preprocessing = dict() if 'shape' in states_spec: # Single-state self.unique_state = True state = dict(states_spec) self.states_spec = dict(state=state) if isinstance(state['shape'], int): # Shape: int to unary tuple state['shape'] = (state['shape'], ) if 'type' not in state: # Type: default to float state['type'] = 'float' if config.preprocessing is not None: preprocessing = Preprocessing.from_spec( spec=config.preprocessing) self.preprocessing['state'] = preprocessing state['shape'] = preprocessing.processed_shape( shape=state['shape']) else: # Multi-state self.unique_state = False self.states_spec = dict(states_spec) for name, state in self.states_spec.items(): if isinstance(state['shape'], int): # Shape: int to unary tuple state['shape'] = (state['shape'], ) if 'type' not in state: # Type: default to float state['type'] = 'float' if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_spec( config.preprocessing[name]) self.preprocessing[name] = preprocessing state['shape'] = preprocessing.processed_shape( shape=state['shape']) # Actions config and exploration self.exploration = dict() if 'type' in actions_spec: # Single-action self.unique_action = True action = dict(actions_spec) self.actions_spec = dict(action=action) if action['type'] == 'int': # Check required values if 'num_actions' not in action: raise TensorForceError( "Action requires value 'num_actions' set!") elif action['type'] == 'float': if ('min_value' in action) != ('max_value' in action): raise TensorForceError( "Action requires both values 'min_value' and 'max_value' set!" ) if 'shape' not in action: # Shape: default to empty tuple action['shape'] = () if isinstance(action['shape'], int): # Shape: int to unary tuple action['shape'] = (action['shape'], ) if config.exploration is not None: self.exploration['action'] = Exploration.from_spec( config.exploration) else: # Multi-action self.unique_action = False self.actions_spec = dict(actions_spec) for name, action in self.actions_spec.items(): if action['type'] == 'int': # Check required values if 'num_actions' not in action: raise TensorForceError( "Action requires value 'num_actions' set!") elif action['type'] == 'float': if ('min_value' in action) != ('max_value' in action): raise TensorForceError( "Action requires both values 'min_value' and 'max_value' set!" ) if 'shape' not in action: # Shape: default to empty tuple action['shape'] = () if isinstance(action['shape'], int): # Shape: int to unary tuple action['shape'] = (action['shape'], ) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_spec( config.exploration[name]) # reward preprocessing config if config.reward_preprocessing is None: self.reward_preprocessing = None else: self.reward_preprocessing = Preprocessing.from_spec( config.reward_preprocessing) self.model = self.initialize_model(states_spec=self.states_spec, actions_spec=self.actions_spec, config=config) not_accessed = config.not_accessed() if not_accessed: self.logger.warning("Configuration values not accessed: {}".format( ', '.join(not_accessed))) self.episode = -1 self.timestep = 0 self.reset()
def __init__( self, states_spec, actions_spec, preprocessing, exploration, reward_preprocessing, batched_observe ): """ Initializes the reinforcement learning agent. Args: states_spec: Dict containing at least one state definition. In the case of a single state, keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state is a dict itself with a unique name as its key. actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions` for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more. preprocessing: Optional list of preprocessors (e.g. `image_resize`, `grayscale`) to apply to state. Each preprocessor is a dict containing a type and optional necessary arguments. exploration: Optional dict specifying exploration type (epsilon greedy strategies or Gaussian noise) and arguments. reward_preprocessing: Optional dict specifying reward preprocessor using same syntax as state preprocessing. batched_observe: Optional int specifying how many observe calls are batched into one session run. Without batching, throughput will be lower because every `observe` triggers a session invocation to update rewards in the graph. """ # States config and preprocessing self.preprocessing = dict() self.unique_state = ('shape' in states_spec) if self.unique_state: states_spec = dict(state=states_spec) preprocessing = dict(state=preprocessing) self.states_spec = deepcopy(states_spec) for name, state in self.states_spec.items(): # Convert int to unary tuple if isinstance(state['shape'], int): state['shape'] = (state['shape'],) # Set default type to float if 'type' not in state: state['type'] = 'float' if preprocessing is not None and preprocessing.get(name) is not None: state_preprocessing = Preprocessing.from_spec(spec=preprocessing[name]) self.preprocessing[name] = state_preprocessing state['shape'] = state_preprocessing.processed_shape(shape=state['shape']) # Actions config and exploration self.exploration = dict() self.unique_action = ('type' in actions_spec) if self.unique_action: actions_spec = dict(action=actions_spec) exploration = dict(action=exploration) self.actions_spec = deepcopy(actions_spec) for name, action in self.actions_spec.items(): # Check requried values if action['type'] == 'int': if 'num_actions' not in action: raise TensorForceError("Action requires value 'num_actions' set!") elif action['type'] == 'float': if ('min_value' in action) != ('max_value' in action): raise TensorForceError("Action requires both values 'min_value' and 'max_value' set!") # Set default shape to empty tuple if 'shape' not in action: action['shape'] = () # Convert int to unary tuple if isinstance(action['shape'], int): action['shape'] = (action['shape'],) # Set exploration if exploration is not None and exploration.get(name) is not None: self.exploration[name] = Exploration.from_spec(spec=exploration[name]) # Reward preprocessing config if reward_preprocessing is None: self.reward_preprocessing = None else: self.reward_preprocessing = Preprocessing.from_spec(reward_preprocessing) self.model = self.initialize_model( states_spec=self.states_spec, actions_spec=self.actions_spec, ) # Batched observe for better performance with Python. self.batched_observe = batched_observe if self.batched_observe is not None: self.observe_terminal = list() self.observe_reward = list() self.reset()