コード例 #1
0
    def test_incorporating_feedback_multiupdate_with_lambda_decay_diff_value(self):
        legal_actions = get_legal_actions()
        discount = 1
        feature_extractor = feature_extractors.IdentityFeatureExtractor()
        exploration_prob = 0
        step_size = 1
        decay = .2
        maxGradient = 10
        threshold = .001
        maxGradient = 5
        num_consecutive_random_actions = 0
        ql = learning_agents.SARSALambdaLearningAlgorithm(legal_actions, discount, 
            feature_extractor, exploration_prob, step_size, threshold, decay, maxGradient,
            num_consecutive_random_actions)

        state_1 = {'test_feature_1' : 1}
        state_2 = {'test_feature_2' : 1}
        state_3 = {'test_feature_3' : 1}
        state_4 = {'test_feature_4' : 1}
        action = 0
        reward = 1
        ql.incorporateFeedback(state_1, action, reward, state_2)
        ql.incorporateFeedback(state_2, action, reward, state_3)
        ql.incorporateFeedback(state_3, action, reward, state_4)

        
        actual = ql.weights
        expected = collections.Counter({'test_feature_1': 1.24, 
                    'test_feature_2': 1.2, 'test_feature_3': 1})
        self.assertEquals(actual, expected)
コード例 #2
0
    def test_incorporating_feedback_overlapping_multiupdate_with_decay_negative(self):
        legal_actions = get_legal_actions()
        discount = 1
        feature_extractor = feature_extractors.IdentityFeatureExtractor()
        exploration_prob = 0
        step_size = 1
        decay = .5
        maxGradient = 10
        threshold = .001
        maxGradient = 5
        num_consecutive_random_actions = 0
        ql = learning_agents.SARSALambdaLearningAlgorithm(legal_actions, discount, 
            feature_extractor, exploration_prob, step_size, threshold, decay, maxGradient, 
            num_consecutive_random_actions)

        state_1 = {'test_feature_1' : 1}
        # ql.weights['test_feature_2'] = 1
        state_2 = {'test_feature_2' : -1}
        state_3 = {'test_feature_1' : 1}
        action = 0
        reward_1 = 1
        reward_2 = -1
        ql.incorporateFeedback(state_1, action, reward_1, state_2)
        ql.incorporateFeedback(state_2, action, reward_2, state_3)

        
        actual = ql.weights
        expected = collections.Counter({'test_feature_1': 1, 
                    'test_feature_2': 0})
        self.assertEquals(actual, expected)