def __init__(self,
                 domain: Domain,
                 task: Task,
                 epsilon=0.1,
                 alpha=0.6,
                 gamma=0.95,
                 lamb=0.95,
                 expected=False):
        """
        :param domain: The world the agent is placed in.
        :param task: The task in the world, which defines the reward function.
        """
        super().__init__(domain, task)
        self.world = domain
        self.task = task
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.lamb = lamb
        self.expected = expected
        self.previousaction = None
        self.previousstate = None
        self.value_old = 0.0

        example_state = domain.get_current_state()
        actions = domain.get_actions(example_state)
        self.feature_extractor = DiscretizedGridWorldState(
            example_state, GridWorldAction(Direction.up))
        self.eligibility = np.zeros(self.feature_extractor.number_of_features)
        self.value_function = LinearVFA(
            self.feature_extractor.number_of_features, actions)
        self.current_cumulative_reward = 0.0
Exemple #2
0
    def __init__(self,
                 domain: Domain,
                 task: Task,
                 epsilon=0.1,
                 alpha=0.6,
                 gamma=0.95,
                 lamb=0.95,
                 expected=False):
        """
        :param domain: The world the agent is placed in.
        :param task: The task in the world, which defines the reward function.
        """
        super().__init__(domain, task)
        self.world = domain
        self.task = task
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.lamb = lamb
        self.expected = expected
        self.previousaction = None
        self.previousstate = None

        self.value_function = StateActionValueTable(
            domain.get_actions(domain.get_current_state()))
        self.current_cumulative_reward = 0.0
Exemple #3
0
    def __init__(self,
                 domain: Domain,
                 task: Task,
                 feature_extractor: FeatureExtractor,
                 epsilon=0.1,
                 alpha=0.6,
                 gamma=1.0,
                 name="Sarsa"):
        """
        :param domain: The world the agent is placed in.
        :param task: The task in the world, which defines the reward function.
        """
        super().__init__(domain, task, name)
        self.world = domain
        self.task = task
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.previousaction = None
        self.previousstate = None

        actions = domain.get_actions()
        self.feature_extractor = feature_extractor
        self.value_function = CMACValueFunction(
            self.feature_extractor.num_features(), actions, alpha)

        self.current_cumulative_reward = 0.0
Exemple #4
0
    def __init__(self,
                 domain: Domain,
                 task: Task,
                 feature_extractor: FeatureExtractor,
                 epsilon=0.5,
                 alpha=0.1,
                 gamma=1.0,
                 value_function=None):
        self.initial_epsilon = epsilon
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.feature_extractor = feature_extractor

        num_actions = len(domain.get_actions())
        num_state_features = feature_extractor.num_features()

        if value_function is None:
            self.value_function = QNetwork(num_actions, num_state_features,
                                           self.alpha)
        else:
            self.value_function = value_function
        self.world = domain
        self.task = task

        self.current_cumulative_reward = 0.0
        self.previousaction = None
        self.previousstate = None
        self.name = "Q-network"
    def __init__(self, domain: Domain, task: Task, feature_extractor: FeatureExtractor, epsilon=0.1, alpha=0.5,
                 gamma=1.0, value_function=None):
        self.initial_epsilon = epsilon
        self.epsilon = epsilon
        self.alpha = alpha / domain.history_length
        self.gamma = gamma
        self.feature_extractor = feature_extractor

        num_actions = len(domain.get_actions())
        num_state_features = feature_extractor.num_features()

        if value_function is None:
            self.value_function = DoubleQNetworkValueFunction(num_actions, num_state_features, self.alpha)
        else:
            self.value_function = value_function

        self.world = domain
        self.task = task

        self.current_cumulative_reward = 0.0
        self.previousaction = None
        self.previousstate = None
        self.name = "Double Dueling DQN"

        self.experience_buffer = ExperienceBuffer()
        self.total_steps = 0
        self.update_frequency = 4
        self.batch_size = 32
        self.pretraining_steps = 100
Exemple #6
0
    def __init__(self, domain: Domain, task: Task, epsilon=0.1, alpha=0.6, gamma=1.0, lamb=0.95, expected=False,
                 feature_extractor=None,
                 name="Sarsa"):
        """
        :param domain: The world the agent is placed in.
        :param task: The task in the world, which defines the reward function.
        """
        super().__init__(domain, task, name)
        self.world = domain
        self.task = task
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.lamb = lamb
        self.expected = expected
        self.previousaction = None
        self.previousstate = None

        self.value_function = StateActionValueTable(domain.get_actions())
        self.current_cumulative_reward = 0.0
        self.eligibility = {}
        self.v_old = 0
    def __init__(self, domain: Domain, task: Task, feature_extractor: FeatureExtractor, epsilon=0.1, alpha=0.6,
                 gamma=1.0, lamb=0.95, name="True Online Sarsa(λ)"):
        """
        :param domain: The world the agent is placed in.
        :param task: The task in the world, which defines the reward function.
        """
        super().__init__(domain, task, name)
        self.world = domain
        self.task = task
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.lamb = lamb
        self.previousaction = None
        self.previousstate = None
        self.value_old = 0.0

        actions = domain.get_actions()
        self.feature_extractor = feature_extractor
        self.eligibility = np.zeros(self.feature_extractor.num_features())
        self.value_function = LinearVFA(self.feature_extractor.num_features(), actions)
        self.current_cumulative_reward = 0.0