예제 #1
0
def policy_test(nn, states, inputs):

    policy = {}
    predictions = nn.predict(inputs).reshape((-1, 9))

    for i in range(512 * 3 * 3):

        choice = np.argmax(predictions[i])
        state = states[i]

        for a1 in range(9):
            for a2 in range(9):

                m1 = np.unravel_index(a1, (3, 3))
                m1 = helicopter3x3.Position(*m1)
                m2 = np.unravel_index(a2, (3, 3))
                m2 = helicopter3x3.Position(*m2)

                region = region_from_state(state, m1, m2)

                if region != None:

                    if region not in policy.keys():
                        policy[region] = [0 for _ in range(9)]
                    policy[region][choice] += 1

    for region in policy.keys():
        policy[region] = [a / sum(policy[region]) for a in policy[region]]

    return policy
예제 #2
0
def do_epsilon_greedy_step_regions(paths, epsilon, predict_model):

    # Get predictions (don't query the net if epsilon == 1.0)
    if epsilon == 1.0:
        p = np.zeros((len(paths), 9)).reshape((-1,9))
    else:
        p = get_predictions(paths, predict_model).reshape((-1,9))

    # Exploration with probability epsilon
    epsilon_choice = np.random.uniform(size=len(paths)) < epsilon
    # Argsort to get action ranks
    ranked_actions = p.argsort()

    # For each state in path
    alldone = True
    for n, path in enumerate(paths):
        _ , state = path[-1]
        if state.status != helicopter3x3.Status.flying:
            continue
        if epsilon_choice[n]:
            state = copy.deepcopy(state)
            move = random_move_over_regions(state)
            state.receive_Move(move)
        else:
            # Find highest ranked valid move
            state = copy.deepcopy(state)
            for action in ranked_actions[n,::-1]:
                action = np.unravel_index(action, (3,3))
                move = helicopter3x3.Position(*action)
                if state.receive_Move(move) is None: break

        path.append((move, state))
        alldone = False
    return alldone
예제 #3
0
def generate_random_initial(size):
    states = []
    for nmap in generate_random_maps(size):
        state = helicopter3x3.State()
        pos = helicopter3x3.Position(0, 0)
        state.receive_SetState(pos, nmap, 1)
        states.append(state)
    return states
예제 #4
0
def generate_all_from(x, y, f):

    states = []
    for nmap in generate_all_maps():
        state = helicopter3x3.State()
        pos = helicopter3x3.Position(x, y)
        state.receive_SetState(pos, nmap, f)
        states.append(state)

    return states
예제 #5
0
def evaluate(nnets):
    ix = np.arange(2**9)
    maps = []
    for i in range(9):
        mask = np.left_shift(np.ones_like(ix), i)
        maps.append(np.bitwise_and(mask, ix).astype(bool))
    maps = np.array(maps).T
    maps = maps.reshape((512, 3, 3))

    allstates = []
    ps = np.array(np.meshgrid(np.arange(3), np.arange(3))).T
    for imap in maps:
        islands = defaultdict(lambda: False)
        for x, y in ps[np.array(imap).astype(bool)]:
            islands[helicopter3x3.Position(x, y)] = True
        state = helicopter3x3.State()
        state.receive_SetState(helicopter3x3.Position(0, 0), islands, 1)
        allstates.append(state)

    allmoves = np.array(np.unravel_index(np.arange(9), (3, 3))).T
    allmoves = [helicopter3x3.Position(x, y) for x, y in allmoves]

    inputs = np.zeros((512, 3, 3, 2))

    alldone = False
    while not alldone:
        for n, state in enumerate(allstates):
            encode_state(inputs[n], state)

        predictions = nnets.predict_model.predict(inputs).reshape((-1, 9))
        ranked_actions = predictions.argsort()
        alldone = True
        for n, state in enumerate(allstates):
            if state.status != helicopter3x3.Status.flying:
                continue
            alldone = False
            for m in ranked_actions[n][::-1]:
                if state.receive_Move(allmoves[m]) is None:
                    break
    ev = pd.Series([s.status for s in allstates]).value_counts().to_dict()
    crashed = ev.get(helicopter3x3.Status.crashed, 0)
    reached = ev.get(helicopter3x3.Status.reached, 0)
    return reached, crashed
예제 #6
0
def generate_random_maps(size):
    r = np.random.uniform(0,2**9,size=size)
    r = r.astype(np.uint32).view(np.uint8)
    r = np.unpackbits(r).reshape((-1,32))[:,:9]
    r = np.argwhere(r.astype(bool))

    data = [defaultdict(lambda:[]) for _ in range(size)]
    for i in range(r.shape[0]):
        n , ix = r[i]
        p = helicopter3x3.Position(*np.unravel_index(ix,(3,3)))
        data[n][p] = True
    return data
예제 #7
0
def random_move():
    ix = int(np.random.uniform(9))
    move = np.unravel_index(ix, (3, 3))
    return helicopter3x3.Position(*move)