Example #1
0
class QubeTabularAgent(object):
    def __init__(self, env):
        self.env = env
        self.models = {}
        self.feature_transformer = LBPFeatureTransformer()
        for a in env.actions_available:
            self.models[a] = dict()

    def predict_from_action(self, s, a):
        X = self.feature_transformer.transform(s, normalize=False)
        X = tuple(X)
        prediction = 0.0
        if X in self.models[a]:
            prediction = self.models[a][X]
        return prediction

    def predict(self, s):
        return np.array(
            [self.predict_from_action(s, a) for a in self.models.keys()])

    def update(self, s, a, G):
        X = self.feature_transformer.transform(s, normalize=False)
        X = tuple(X)
        if X in self.models[a]:
            self.models[a][X] += G
        else:
            self.models[a][X] = np.random.uniform(0.0, 1.0)

    def sample_action(self, s, eps):
        if np.random.random() < eps:
            return self.env.sample_action()
        else:
            actions = self.models.keys()
            G = [self.predict_from_action(s, a) for a in self.models.keys()]
            if max(G) == 0.0:
                action = np.random.choice(actions)
            else:
                action = actions[np.argmax(G)]
            return action
Example #2
0
def lbph_reward(cube, reward_positive=100, reward_negative=-1):
    if cube.is_solved():
        reward = reward_positive
    else:
        actions_taken = cube.actions_taken
        # punish to avoid regrets (i.e. agent moving back and forth between two states)
        if len(actions_taken) >= 2 and are_inverse_actions(actions_taken[-2], actions_taken[-1]):
            reward = 10 * reward_negative
        # punish to avoid loops (i.e. agent doing a complete loop up to the same state,
        #                        or making three steps that are equal to make just a single inverse step)
        elif len(actions_taken) >= 3 and len(set(actions_taken[-3:])) == 1:
            reward = 10 * reward_negative
        else:
            state = cube.get_state()
            lbp_code = LBPFeatureTransformer.transform(state, normalize=False)
            hist_lbp = LBPFeatureTransformer.hist_lbp_code(lbp_code)
            coefficients = np.linspace(-1.0, 1.0, len(hist_lbp))
            #coefficients[coefficients > 0.0] = 0.0
            reward = sum([c * h for (c, h) in zip(coefficients, hist_lbp)])
            reward += reward_negative  # due to take a step
    return reward
Example #3
0
if __name__ == "__main__":
    """
    Functional testing.
    """
    # Taking all of the supported actions

    seed = np.random.randint(0, 100)
    print "Using seed=%i" % seed
    ce = CubeEnvironment(n=3, seed=seed, whiteplastic=False)
    print "---o- ---------------------------- -o---"
    print "---o- Taking all supported actions -o---"
    print "---o- ---------------------------- -o---"
    state = ce.cube.get_state()
    print "State: %s" % str(state)
    lbp_code = LBPFeatureTransformer.transform(state)
    print "LBP code: %s" % str(lbp_code)
    lbp_hist = LBPFeatureTransformer.hist_lbp_code(lbp_code)
    print "LBP hist: %s" % str(lbp_hist)
    print "It's solved!" if ce.is_solved() else "Not solved!"
    for a in actions_available:
        print "Taking the following action: %s" % a
        ce.take_action(a)
        state = ce.cube.get_state()
        print "State: %s" % str(state)
        lbp_code = LBPFeatureTransformer.transform(state)
        print "LBP code: %s" % str(lbp_code)
        lbp_hist = LBPFeatureTransformer.hist_lbp_code(lbp_code)
        print "LBP hist: %s" % str(lbp_hist)
        print "It's solved!" if ce.is_solved() else "Not solved!"
        #ce.render(flat=False)#.savefig("test%02d.png" % m, dpi=865 / c.N)