def codalab_run(run_id):
    # a=ppp((5,5),(((1,1), (3,3)),),(0,0))
    a = None
    if run_id == 0:
        a = ppp((10, 4), (((0, 3), (0, 6)), ((3, 3), (3, 6))))
    if run_id == 1:
        a = ppp((10, 10), (
            ((1, 1), (1, 2)),
            ((1, 2), (3, 2)),
            ((6, 1), (8, 1)),
            ((8, 1), (8, 3)),
            ((6, 3), (8, 3)),
            ((3, 4), (4, 4)),
            ((4, 4), (4, 5)),
            ((6, 6), (6, 7)),
            ((1, 7), (2, 8)),
        ), (0, 0))
    obstacles = list(a.env.block_area)
    occupancy = astar.DetOccupancyGrid2D(a.env.map.height, a.env.map.width,
                                         obstacles)
    aa = []
    while not a.end():
        action = exptimax(a, 9)
        X, Y = a.env.next_location(action)
        m = a.env.entire_map()
        if m[X][Y] == a.env.map.VISITED:
            x_init = a.env.agent_location()
            x_goal = a.env.remaining_nodes()[0]
            Astar = astar.AStar((0, 0), (a.env.map.height, a.env.map.width),
                                x_init, x_goal, occupancy)
            if not Astar.solve():
                print("Not Solve")
            else:
                for j in range(len(Astar.path) - 1):
                    a1, b1 = Astar.path[j]
                    a2, b2 = Astar.path[j + 1]
                    if a2 == a1 - 1 and b1 == b2:
                        a.env.step(a.env.UP)
                    elif a2 == a1 + 1 and b1 == b2:
                        a.env.step(a.env.DOWN)
                    elif a2 == a1 and b2 == b1 - 1:
                        a.env.step(a.env.LEFT)
                    elif a2 == a1 and b2 == b1 + 1:
                        a.env.step(a.env.RIGHT)
            # aa.append(a.env.agent_location())
        else:
            aa.append(a.env.agent_location())
            a.env.step(action)
        print(action)
    aa.append(a.env.agent_location())
    print(aa, a.env.agent_distance, a.env.agent_turns)

    stats = {
        "turn": a.env.agent_turns,
        "dist": a.env.agent_distance,
        "notes": ""
    }

    return stats
Exemple #2
0
def main():
    # a=ppp((5,5),(((1,1), (3,3)),),(0,0))
    a = ppp((10, 10), (
        ((1, 1), (1, 2)),
        ((1, 2), (3, 2)),
        ((6, 1), (8, 1)),
        ((8, 1), (8, 3)),
        ((6, 3), (8, 3)),
        ((3, 4), (4, 4)),
        ((4, 4), (4, 5)),
        ((6, 6), (6, 7)),
        ((1, 7), (2, 8)),
    ), (0, 0))
    obstacles = list(a.env.block_area)
    occupancy = astar.DetOccupancyGrid2D(a.env.map.width, a.env.map.height,
                                         obstacles)
    aa = []
    while not a.end():
        action = exptimax(a, 9)
        X, Y = a.env.next_location(action)
        m = a.env.entire_map()
        if m[X][Y] == a.env.map.VISITED:
            newx, newy = a.env.remaining_nodes()[0]
            x_init = a.env.agent_location()
            a.env.agentX = newx
            a.env.agentY = newy
            a.env.map.visit(newx, newy)
            x_goal = a.env.agent_location()
            Astar = astar.AStar((0, 0), (a.env.map.width, a.env.map.height),
                                x_init, x_goal, occupancy)
            if not Astar.solve():
                print("Not Solve")
            else:
                a.env.agent_distance += len(Astar.path)
                for j in range(len(Astar.path) - 1):
                    a1, b1 = Astar.path[j]
                    a2, b2 = Astar.path[j + 1]
                    if a1 != a2 and b1 != b2:
                        a.env.agent_turns += 1
            a.env.path.extend(Astar.path)
            # aa.append(a.env.agent_location())
        else:
            aa.append(a.env.agent_location())
            a.env.step(action)
        print(action)
    aa.append(a.env.agent_location())
    print(aa, a.env.agent_distance, a.env.agent_turns)
    a.env.plot_path()
def main():
    # a=ppp((5,5),(((1,1), (3,3)),),(0,0))
    a = ppp(5)
    obstacles = list(a.env.block_area)
    occupancy = astar.DetOccupancyGrid2D(a.env.map.height, a.env.map.width,
                                         obstacles)
    aa = []
    while not a.end():
        action = exptimax(a, 9)
        X, Y = a.env.next_location(action)
        m = a.env.entire_map()
        if m[X][Y] == a.env.map.VISITED:
            x_init = a.env.agent_location()
            x_goal = a.env.remaining_nodes()[0]
            Astar = astar.AStar((0, 0), (a.env.map.height, a.env.map.width),
                                x_init, x_goal, occupancy)
            if not Astar.solve():
                print("Not Solve")
            else:
                for j in range(len(Astar.path) - 1):
                    a1, b1 = Astar.path[j]
                    a2, b2 = Astar.path[j + 1]
                    if a2 == a1 - 1 and b1 == b2:
                        a.env.step(a.env.UP)
                    elif a2 == a1 + 1 and b1 == b2:
                        a.env.step(a.env.DOWN)
                    elif a2 == a1 and b2 == b1 - 1:
                        a.env.step(a.env.LEFT)
                    elif a2 == a1 and b2 == b1 + 1:
                        a.env.step(a.env.RIGHT)
            # aa.append(a.env.agent_location())
        else:
            aa.append(a.env.agent_location())
            a.env.step(action)
        print(action)
    aa.append(a.env.agent_location())
    print(aa, a.env.agent_distance, a.env.agent_turns)
def local_map_approx_search(aaa):
    aaaa = []

    def getAction(pp, dd):
        def recurse(s, d):
            if s.end():
                return s.reward()
            elif d == 0:
                return s.reward()
            else:
                f = -float(' inf ')
                for a in s.getLegalActions():
                    tempt = recurse(s.generateSuccessor(a), d - 1)
                    if tempt > f:
                        f = tempt
                return f

        f = -float(' inf ')
        astore = None
        for a in pp.getLegalActions():
            tempt = recurse(pp.generateSuccessor(a), dd - 1)
            if tempt > f:
                f = tempt
                astore = a
        return astore

    obstacles = list(aaa.env.block_area)
    occupancy = astar.DetOccupancyGrid2D(aaa.env.map.height, aaa.env.map.width,
                                         obstacles)
    while (aaa.end() != 1):
        pp = ppp()
        pp.env.map.data = aaa.env.local_map(aaa.lmapsize, aaa.lmapsize)
        a = getAction(pp, aaa.lmapsize * aaa.lmapsize - 1)
        X, Y = aaa.env.next_location(a)
        m = aaa.env.entire_map()
        if m[X][Y] == aaa.env.map.VISITED:
            x_init = aaa.env.agent_location()
            x_goal = aaa.env.remaining_nodes()[0]
            Astar = astar.AStar(
                (0, 0), (aaa.env.map.height, aaa.env.map.width), x_init,
                x_goal, occupancy)
            if not Astar.solve():
                print("Not Solve")
            else:
                for j in range(len(Astar.path) - 1):
                    a1, b1 = Astar.path[j]
                    a2, b2 = Astar.path[j + 1]
                    if a2 == a1 - 1 and b1 == b2:
                        aaa.env.step(aaa.env.UP)
                    elif a2 == a1 + 1 and b1 == b2:
                        aaa.env.step(aaa.env.DOWN)
                    elif a2 == a1 and b2 == b1 - 1:
                        aaa.env.step(aaa.env.LEFT)
                    elif a2 == a1 and b2 == b1 + 1:
                        aaa.env.step(aaa.env.RIGHT)
        # aa.append(a.env.agent_location())
        else:
            aaaa.append(aaa.env.agent_location())
            aaa.env.step(a)
        #print(aaaa)
    # aaaa.append(aaa.env.agent_location())
    return aaaa
def TDlearning(ppp, eps=0.3, iteration=200, max=10000):
    model = Sequential()
    #model.add(LocallyConnected2D(5, (3, 3),
    #       input_shape=(1,5, 5), padding='valid',))
    # model.add(Flatten(input_shape=(1,5, 5)))
    model.add(Dense(50, activation='relu', input_dim=104))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer='adam', metrics=['mae'])
    #model.compile(loss='mse',optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.9, nesterov=True))
    for i in range(iteration):
        ttttt = list(ppp.env.counter.get_data(100).flatten())
        ttttt.append(ppp.env.agentX)
        ttttt.append(ppp.env.agentY)
        ttttt.append(ppp.env.agent_turns)
        ttttt.append(ppp.env.agent_distance)
        ttttt = np.expand_dims(ttttt, axis=0)
        val = model.predict(ttttt)
        print("i=", i, val)
        temptppp = copy.deepcopy(ppp)
        obstacles = list(temptppp.env.block_area)
        occupancy = astar.DetOccupancyGrid2D(temptppp.env.map.height,
                                             temptppp.env.map.width, obstacles)
        j = 0
        while temptppp.end() != 1 and j < max:
            j += 1
            #print(temptppp.env.counter.data)
            turns = temptppp.env.agent_turns
            distance = temptppp.env.agent_distance
            unvisited = temptppp.env.num_unvisited_nodes()
            if random.random() < eps:
                a = random.choice(temptppp.getLegalActions())
                newppp = temptppp.generateSuccessor(a)
                ttttt = list(newppp.env.counter.get_data(100).flatten())
                ttttt.append(newppp.env.agentX)
                ttttt.append(newppp.env.agentY)
                ttttt.append(newppp.env.agent_turns)
                ttttt.append(newppp.env.agent_distance)
                ttttt = np.expand_dims(ttttt, axis=0)
                val = model.predict(ttttt)
                differturns = newppp.env.agent_turns - turns
                differdistance = 1
                differunvisited = newppp.env.num_unvisited_nodes() - unvisited
                fff = reward = val - differturns * 2 - 2 - differunvisited * 10
            else:
                fff = [[-float('Inf')]]
                for ttt in temptppp.getLegalActions():
                    newppp = temptppp.generateSuccessor(ttt)
                    ttttt = list(newppp.env.counter.get_data(100).flatten())
                    ttttt.append(newppp.env.agentX)
                    ttttt.append(newppp.env.agentY)
                    ttttt.append(newppp.env.agent_turns)
                    ttttt.append(newppp.env.agent_distance)
                    ttttt = np.expand_dims(ttttt, axis=0)
                    val = model.predict(ttttt)
                    differturns = newppp.env.agent_turns - turns
                    differdistance = 1
                    differunvisited = newppp.env.num_unvisited_nodes(
                    ) - unvisited
                    reward = val - differturns * 2 - 2 - differunvisited * 10
                    if reward[0][0] > fff[0][0]:
                        a = ttt
                        fff = reward
            X, Y = temptppp.env.next_location(a)
            m = temptppp.env.entire_map()
            if m[X][Y] == temptppp.env.map.VISITED:
                x_init = temptppp.env.agent_location()
                x_goal = temptppp.env.remaining_nodes()[0]
                Astar = astar.AStar(
                    (0, 0), (temptppp.env.map.height, temptppp.env.map.width),
                    x_init, x_goal, occupancy)
                Astar.solve()
                a1, b1 = Astar.path[0]
                a2, b2 = Astar.path[1]
                if a2 == a1 - 1 and b1 == b2:
                    a = temptppp.env.UP
                elif a2 == a1 + 1 and b1 == b2:
                    a = temptppp.env.DOWN
                elif a2 == a1 and b2 == b1 - 1:
                    a = temptppp.env.LEFT
                elif a2 == a1 and b2 == b1 + 1:
                    a = temptppp.env.RIGHT
                newppp = temptppp.generateSuccessor(a)
                differturns = newppp.env.agent_turns - turns
                differdistance = 1
                differunvisited = newppp.env.num_unvisited_nodes() - unvisited
                fff = reward = val - differturns * 2 - 2 - differunvisited * 10
            target = fff
            ttttt = list(temptppp.env.counter.get_data(100).flatten())
            ttttt.append(temptppp.env.agentX)
            ttttt.append(temptppp.env.agentY)
            ttttt.append(temptppp.env.agent_turns)
            ttttt.append(temptppp.env.agent_distance)
            ttttt = np.expand_dims(ttttt, axis=0)
            model.fit(ttttt, target, epochs=1, verbose=0)
            temptppp.env.step(a)
        ttttt = list(temptppp.env.counter.get_data(100).flatten())
        ttttt.append(temptppp.env.agentX)
        ttttt.append(temptppp.env.agentY)
        ttttt.append(temptppp.env.agent_turns)
        ttttt.append(temptppp.env.agent_distance)
        ttttt = np.expand_dims(ttttt, axis=0)
        # if temptppp.end()==1:
        #while model.predict(ttttt)>1:
        model.fit(ttttt, [[0.0]], epochs=5, verbose=0)
        ttttt = list(temptppp.env.counter.get_data(100).flatten())
        ttttt.append(temptppp.env.agentX)
        ttttt.append(temptppp.env.agentY)
        ttttt.append(temptppp.env.agent_turns)
        ttttt.append(temptppp.env.agent_distance)
        ttttt = np.expand_dims(ttttt, axis=0)
        print('end', model.predict(ttttt))

    j = 0
    while ppp.end() != 1 and j < 500:
        j += 1  #
        fff = -float('Inf')
        turns = ppp.env.agent_turns
        distance = ppp.env.agent_distance
        unvisited = ppp.env.num_unvisited_nodes()
        for ttt in ppp.getLegalActions():
            newppp = ppp.generateSuccessor(ttt)
            ttttt = list(newppp.env.counter.get_data(100).flatten())
            ttttt.append(newppp.env.agentX)
            ttttt.append(newppp.env.agentY)
            ttttt.append(newppp.env.agent_turns)
            ttttt.append(newppp.env.agent_distance)
            ttttt = np.expand_dims(ttttt, axis=0)
            val = model.predict(ttttt)
            differturns = newppp.env.agent_turns - turns
            differdistance = 1
            differunvisited = newppp.env.num_unvisited_nodes() - unvisited
            reward = val - differturns * 2 - 2 - differunvisited * 10
            if reward > fff:
                a = ttt
                fff = val
        X, Y = ppp.env.next_location(a)
        m = ppp.env.entire_map()
        if m[X][Y] == ppp.env.map.VISITED:
            x_init = ppp.env.agent_location()
            x_goal = ppp.env.remaining_nodes()[0]
            Astar = astar.AStar(
                (0, 0), (ppp.env.map.height, ppp.env.map.width), x_init,
                x_goal, occupancy)
            Astar.solve()
            a1, b1 = Astar.path[0]
            a2, b2 = Astar.path[1]
            if a2 == a1 - 1 and b1 == b2:
                a = ppp.env.UP
            elif a2 == a1 + 1 and b1 == b2:
                a = ppp.env.DOWN
            elif a2 == a1 and b2 == b1 - 1:
                a = ppp.env.LEFT
            elif a2 == a1 and b2 == b1 + 1:
                a = ppp.env.RIGHT
        ppp.env.step(a)
        print(a)
    print(a, turns, distance, unvisited)
    print(ppp.env.counter.data)
    print(ppp.env.agent_turns, ppp.env.agent_distance)