Beispiel #1
0
def simulateVanillaMCTS(model, state, depth, q, counts, seenStates, stateStr):
    if depth == 0:
        return 0
    if stateStr not in seenStates:
        q[stateStr] = {}
        counts[stateStr] = {}
        for move in moves:
            nextState = py222.doAlgStr(state, move)
            nextStateArray = np.array([py222.getState(nextState).flatten()])
            value, _ = model.predict(nextStateArray)
            q[stateStr][move] = value + reward(nextState)
            counts[stateStr][move] = 1
        seenStates.add(stateStr)
        return rolloutVanillaMCTS(model, state, depth)
    totalStateCounts = 0
    for move in moves:
        totalStateCounts += counts[stateStr][move]
    allQuantities = np.zeros(len(moves))
    for i in range(len(moves)):
        allQuantities[i] = q[stateStr][
            moves[i]] + constants.kMCTSExploration * math.sqrt(
                math.log(totalStateCounts) / counts[stateStr][moves[i]])
    bestActionIndex = allQuantities.argmax()
    bestMove = moves[bestActionIndex]
    nextState = py222.doAlgStr(state, bestMove)
    r = reward(nextState)
    newQ = r + constants.kDiscountFactor * simulateVanillaMCTS(
        model, nextState, depth - 1, q, counts, seenStates, str(nextState))
    counts[stateStr][bestMove] += 1
    q[stateStr][bestMove] += (
        newQ - q[stateStr][bestMove]) / counts[stateStr][bestMove]
    return newQ
Beispiel #2
0
def rolloutVanillaMCTS(model, cube, depth):
    if depth == 0:
        return 0
    state = np.array([py222.getState(cube).flatten()])
    _, policies = model.predict(state)
    actionIndex = selectActionSoftmax(policies)
    nextState = py222.doAlgStr(cube, moves[actionIndex])
    r = reward(nextState)
    return r + constants.kDiscountFactor * rolloutVanillaMCTS(
        model, nextState, depth - 1)
Beispiel #3
0
def solveSingleCubeGreedy(model, cube, maxMoves):
    numMovesTaken = 0
    while numMovesTaken <= maxMoves:
        if py222.isSolved(cube, convert=True):
            return True, numMovesTaken
        state = np.array([py222.getState(cube).flatten()])
        _, policies = model.predict(state)
        policiesArray = policies[0]
        bestMove = policiesArray.argmax()
        cube = py222.doAlgStr(cube, moves[bestMove])
        numMovesTaken += 1
    return False, maxMoves + 1
Beispiel #4
0
def generateSamples(k, l):
    N = k * l
    samples = np.empty((N, constants.kNumStickers), dtype=bytes)
    states = np.empty((N, constants.kNumCubes * constants.kNumStickers))
    for i in range(l):
        currentCube = py222.initState()
        for j in range(k):
            scrambledCube = py222.doAlgStr(currentCube, getRandomMove())
            samples[k * i + j] = scrambledCube
            states[k * i + j] = py222.getState(scrambledCube).flatten()
            currentCube = scrambledCube
    return samples, coo_matrix(states)
Beispiel #5
0
def solveSingleCubeVanillaMCTS(model, cube, maxMoves, maxDepth):
    numMovesTaken = 0
    q = {}
    counts = {}
    while numMovesTaken <= maxMoves:
        if py222.isSolved(cube, convert=True):
            return True, numMovesTaken
        bestMove = selectActionVanillaMCTS(model, cube, maxDepth, q, counts)
        if bestMove == -1:
            print("something went wrong when selecting best move")
            break
        cube = py222.doAlgStr(cube, moves[bestMove])
        numMovesTaken += 1
    return False, maxMoves + 1
Beispiel #6
0
def doADI(k, l, M):
    model = buildModel(constants.kNumStickers * constants.kNumCubes)
    compileModel(model, constants.kLearningRate)
    for iterNum in range(M):
        t0 = time.time()
        samples, _ = generateSamples(k, l)
        t1 = time.time()
        print(t1 - t0)
        states = np.empty(
            (len(samples), constants.kNumStickers * constants.kNumCubes))
        optimalVals = np.empty((len(samples), 1))
        optimalPolicies = np.empty(len(samples), dtype=np.int32)
        t0 = time.time()
        for i, sample in enumerate(samples):
            values = np.empty(len(moves))
            for j, move in enumerate(moves):
                child = py222.doAlgStr(sample, move)
                childState = np.array([py222.getState(child).flatten()])
                value, _ = model.predict(childState)
                value = value[0][0]
                values[j] = value + reward(child)
            optimalVals[i] = np.array([values.max()])
            optimalPolicies[i] = values.argmax()
            states[i] = py222.getState(sample).flatten()
        t1 = time.time()
        print(t1 - t0)
        t0 = time.time()
        model.fit(states, {
            "PolicyOutput": optimalPolicies,
            "ValueOutput": optimalVals
        },
                  epochs=constants.kNumMaxEpochs,
                  verbose=False,
                  steps_per_epoch=1)
        t1 = time.time()
        print(t1 - t0)
        gc.collect()
        print(iterNum)
    return model
Beispiel #7
0
import py222
import solver
import numpy as np

# get solved state
s = py222.initState()

# apply some scramble
s = py222.doAlgStr(s, "F2")

# solve cube
solver.solveCube(s)
Beispiel #8
0
# solve a cube state
def solveCube(s):
    # print cube state
    py222.printCube(s)

    # FC-normalize stickers
    print("normalizing stickers...")
    s = py222.normFC(s)

    # generate pruning tables
    print("generating pruning tables...")
    genOTable(py222.initState(), 0)
    genPTable(py222.initState(), 0)

    # run IDA*
    print("searching...")
    solved = False
    depth = 1
    while depth <= 11 and not solved:
        print("depth {}".format(depth))
        solved = IDAStar(s, depth, [])
        depth += 1


if __name__ == "__main__":
    # input some scrambled state
    s = py222.doAlgStr(py222.initState(), "R U2 R2 F2 R' F2 R F R")
    # solve cube
    solveCube(s)
Beispiel #9
0
def solveSingleCubeFullMCTS(model, cube, maxMoves):
    numMovesTaken = 0
    simulatedPath = []
    simulatedActions = []
    treeStates = set()
    seenStates = set()
    currentCube = cube
    currentCubeStr = str(cube)
    counts = {}
    maxVals = {}
    priorProbabilities = {}
    virtualLosses = {}
    state = np.array([py222.getState(currentCube).flatten()])
    _, probs = model.predict(state)
    probsArray = probs[0]
    initStateVals(currentCubeStr, counts, maxVals, priorProbabilities,
                  virtualLosses, probsArray)
    seenStates.add(currentCubeStr)
    simulatedPath.append(currentCube)
    while numMovesTaken <= maxMoves:
        if py222.isSolved(currentCube, convert=True):
            return True, numMovesTaken, simulatedPath
        if currentCubeStr not in treeStates:
            for move in moves:
                childState = py222.doAlgStr(currentCube, move)
                childStateStr = str(childState)
                if childStateStr not in seenStates:
                    state = np.array([py222.getState(childState).flatten()])
                    _, probs = model.predict(state)
                    probsArray = probs[0]
                    initStateVals(childStateStr, counts, maxVals,
                                  priorProbabilities, virtualLosses,
                                  probsArray)
                    seenStates.add(childStateStr)
            state = np.array([py222.getState(currentCube).flatten()])
            value, _ = model.predict(state)
            value = value[0][0]
            for i, state in enumerate(simulatedPath):
                if i < len(simulatedActions):
                    stateStr = str(state)
                    maxVals[stateStr][simulatedActions[i]] = max(
                        maxVals[stateStr][simulatedActions[i]], value)
                    counts[stateStr][simulatedActions[i]] += 1
                    virtualLosses[stateStr][
                        simulatedActions[i]] -= constants.kVirtualLoss
            treeStates.add(currentCubeStr)
        else:
            actionVals = np.zeros(len(moves))
            totalStateCounts = 0
            for move in moves:
                totalStateCounts += counts[currentCubeStr][move]
            for i in range(len(moves)):
                currMove = moves[i]
                q = maxVals[currentCubeStr][currMove] - virtualLosses[
                    currentCubeStr][currMove]
                u = constants.kMCTSExploration * priorProbabilities[
                    currentCubeStr][currMove] * math.sqrt(totalStateCounts) / (
                        1 + counts[currentCubeStr][currMove])
                actionVals[i] = u + q
            bestMoveIndex = actionVals.argmax()
            bestMove = moves[bestMoveIndex]
            virtualLosses[currentCubeStr][bestMove] += constants.kVirtualLoss
            simulatedActions.append(bestMove)
            currentCube = py222.doAlgStr(currentCube, bestMove)
            currentCubeStr = str(currentCube)
            simulatedPath.append(currentCube)
            numMovesTaken += 1
    return False, maxMoves + 1, simulatedPath
Beispiel #10
0
 def executeAction(self, action):
     self.cube = py222.doAlgStr(self.cube, action)