def __init__(self, domain: Domain, task: Task, epsilon=0.1, alpha=0.6, gamma=0.95, lamb=0.95, expected=False): """ :param domain: The world the agent is placed in. :param task: The task in the world, which defines the reward function. """ super().__init__(domain, task) self.world = domain self.task = task self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.lamb = lamb self.expected = expected self.previousaction = None self.previousstate = None self.value_old = 0.0 example_state = domain.get_current_state() actions = domain.get_actions(example_state) self.feature_extractor = DiscretizedGridWorldState( example_state, GridWorldAction(Direction.up)) self.eligibility = np.zeros(self.feature_extractor.number_of_features) self.value_function = LinearVFA( self.feature_extractor.number_of_features, actions) self.current_cumulative_reward = 0.0
def __init__(self, domain: Domain, task: Task, epsilon=0.1, alpha=0.6, gamma=0.95, lamb=0.95, expected=False): """ :param domain: The world the agent is placed in. :param task: The task in the world, which defines the reward function. """ super().__init__(domain, task) self.world = domain self.task = task self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.lamb = lamb self.expected = expected self.previousaction = None self.previousstate = None self.value_function = StateActionValueTable( domain.get_actions(domain.get_current_state())) self.current_cumulative_reward = 0.0
def __init__(self, domain: Domain, task: Task, feature_extractor: FeatureExtractor, epsilon=0.1, alpha=0.6, gamma=1.0, name="Sarsa"): """ :param domain: The world the agent is placed in. :param task: The task in the world, which defines the reward function. """ super().__init__(domain, task, name) self.world = domain self.task = task self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.previousaction = None self.previousstate = None actions = domain.get_actions() self.feature_extractor = feature_extractor self.value_function = CMACValueFunction( self.feature_extractor.num_features(), actions, alpha) self.current_cumulative_reward = 0.0
def __init__(self, domain: Domain, task: Task, feature_extractor: FeatureExtractor, epsilon=0.5, alpha=0.1, gamma=1.0, value_function=None): self.initial_epsilon = epsilon self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.feature_extractor = feature_extractor num_actions = len(domain.get_actions()) num_state_features = feature_extractor.num_features() if value_function is None: self.value_function = QNetwork(num_actions, num_state_features, self.alpha) else: self.value_function = value_function self.world = domain self.task = task self.current_cumulative_reward = 0.0 self.previousaction = None self.previousstate = None self.name = "Q-network"
def __init__(self, domain: Domain, task: Task, feature_extractor: FeatureExtractor, epsilon=0.1, alpha=0.5, gamma=1.0, value_function=None): self.initial_epsilon = epsilon self.epsilon = epsilon self.alpha = alpha / domain.history_length self.gamma = gamma self.feature_extractor = feature_extractor num_actions = len(domain.get_actions()) num_state_features = feature_extractor.num_features() if value_function is None: self.value_function = DoubleQNetworkValueFunction(num_actions, num_state_features, self.alpha) else: self.value_function = value_function self.world = domain self.task = task self.current_cumulative_reward = 0.0 self.previousaction = None self.previousstate = None self.name = "Double Dueling DQN" self.experience_buffer = ExperienceBuffer() self.total_steps = 0 self.update_frequency = 4 self.batch_size = 32 self.pretraining_steps = 100
def __init__(self, domain: Domain, task: Task, epsilon=0.1, alpha=0.6, gamma=1.0, lamb=0.95, expected=False, feature_extractor=None, name="Sarsa"): """ :param domain: The world the agent is placed in. :param task: The task in the world, which defines the reward function. """ super().__init__(domain, task, name) self.world = domain self.task = task self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.lamb = lamb self.expected = expected self.previousaction = None self.previousstate = None self.value_function = StateActionValueTable(domain.get_actions()) self.current_cumulative_reward = 0.0 self.eligibility = {} self.v_old = 0
def __init__(self, domain: Domain, task: Task, feature_extractor: FeatureExtractor, epsilon=0.1, alpha=0.6, gamma=1.0, lamb=0.95, name="True Online Sarsa(λ)"): """ :param domain: The world the agent is placed in. :param task: The task in the world, which defines the reward function. """ super().__init__(domain, task, name) self.world = domain self.task = task self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.lamb = lamb self.previousaction = None self.previousstate = None self.value_old = 0.0 actions = domain.get_actions() self.feature_extractor = feature_extractor self.eligibility = np.zeros(self.feature_extractor.num_features()) self.value_function = LinearVFA(self.feature_extractor.num_features(), actions) self.current_cumulative_reward = 0.0