def test_incorporating_feedback_multiupdate_with_lambda_decay_diff_value(self): legal_actions = get_legal_actions() discount = 1 feature_extractor = feature_extractors.IdentityFeatureExtractor() exploration_prob = 0 step_size = 1 decay = .2 maxGradient = 10 threshold = .001 maxGradient = 5 num_consecutive_random_actions = 0 ql = learning_agents.SARSALambdaLearningAlgorithm(legal_actions, discount, feature_extractor, exploration_prob, step_size, threshold, decay, maxGradient, num_consecutive_random_actions) state_1 = {'test_feature_1' : 1} state_2 = {'test_feature_2' : 1} state_3 = {'test_feature_3' : 1} state_4 = {'test_feature_4' : 1} action = 0 reward = 1 ql.incorporateFeedback(state_1, action, reward, state_2) ql.incorporateFeedback(state_2, action, reward, state_3) ql.incorporateFeedback(state_3, action, reward, state_4) actual = ql.weights expected = collections.Counter({'test_feature_1': 1.24, 'test_feature_2': 1.2, 'test_feature_3': 1}) self.assertEquals(actual, expected)
def test_incorporating_feedback_overlapping_multiupdate_with_decay_negative(self): legal_actions = get_legal_actions() discount = 1 feature_extractor = feature_extractors.IdentityFeatureExtractor() exploration_prob = 0 step_size = 1 decay = .5 maxGradient = 10 threshold = .001 maxGradient = 5 num_consecutive_random_actions = 0 ql = learning_agents.SARSALambdaLearningAlgorithm(legal_actions, discount, feature_extractor, exploration_prob, step_size, threshold, decay, maxGradient, num_consecutive_random_actions) state_1 = {'test_feature_1' : 1} # ql.weights['test_feature_2'] = 1 state_2 = {'test_feature_2' : -1} state_3 = {'test_feature_1' : 1} action = 0 reward_1 = 1 reward_2 = -1 ql.incorporateFeedback(state_1, action, reward_1, state_2) ql.incorporateFeedback(state_2, action, reward_2, state_3) actual = ql.weights expected = collections.Counter({'test_feature_1': 1, 'test_feature_2': 0}) self.assertEquals(actual, expected)