class KilobotsObjectMazeSimulator:
    WIDTH, HEIGHT = 1200, 600
    SCALE_REAL_TO_SIM = 10  # for numerical reasons
    SCALE_REAL_TO_VIS = HEIGHT  # 1m = HEIGHT pixels

    ZMQ_PORT = 2358

    def __init__(self):
        # pygame
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT),
                HWSURFACE | DOUBLEBUF, 32)
        pygame.display.set_caption('kbsim')
        self.clock = pygame.time.Clock()

        # pybox2d
        self.world = world(gravity=(0, 0), doSleep=True)

        self.maze = Labyrinth(self.world, self.SCALE_REAL_TO_SIM, self.SCALE_REAL_TO_VIS)

        # zqm
        context = Context()
        self.socket = context.socket(PAIR)
        self.socket.connect('tcp://localhost:{}'.format(self.ZMQ_PORT))

    def run(self):
        while True:
            msg = pickle.loads(self.socket.recv())

            if msg['message'] == 'sentPolicyModules':
                for fileName, source in msg['modules']:
                    with open(fileName, 'w') as f:
                        f.write(source)
                objPolicyModule = importlib.import_module(msg['objPolicyModule'])
                mazePolicyModule = importlib.import_module(msg['mazePolicyModule'])
            elif msg['message'] == 'testMaze':
                # load the object policy
                objPolicyDict = msg['objPolicyDict']
                self.objPolicy = objPolicyModule.fromSerializableDict(objPolicyDict)

                # load the maze policy
                mazePolicyDict = msg['mazePolicyDict']
                self.mazePolicy = mazePolicyModule.fromSerializableDict(mazePolicyDict)

                # read parameters
                self.objectShape = msg['objectShape']
                self.numKilobots = msg['numKilobots']
                self.stepsPerSec = msg['stepsPerSec']

                self._testMazePolicy()
            else:
                print('got unexpected message')

    def _testMazePolicy(self):
        # create kilobots
        self.kilobots = []
        for i in range(self.numKilobots):
            kilobot = Kilobot(self.world, self.SCALE_REAL_TO_SIM,
                self.SCALE_REAL_TO_VIS, [0, 0])
            kilobot.fixture.friction = 20

            self.kilobots += [kilobot]

        self.pushObject = Object(self.world, self.SCALE_REAL_TO_SIM,
                self.SCALE_REAL_TO_VIS, [0, 0], self.objectShape)

        # fixed object start position
        objStartX = 1.25
        objStartY = 0.75

        r = self.kilobots[0].RADIUS
        kilobotOffsets = array([[-r, -r], [r, -r], [-r, r], [r, r]])

        self.pushObject.body.position = vec2(objStartX, objStartY) *\
                self.SCALE_REAL_TO_SIM
        self.pushObject.body.angle = 0

        # light starts over the object
        lightPos = matrix([objStartX, objStartY])

        HALF_W = self.pushObject.HALF_W

        # kilobots start left of the object
        for (i, kilobot) in zip(range(self.numKilobots), self.kilobots):
            x = objStartX - 2.0 * HALF_W + (1 + i / 4) * kilobotOffsets[i % 4, 0]
            y = objStartY + (1 + i / 4) * kilobotOffsets[i % 4, 1]
            kilobot.body.position = vec2(x, y) * self.SCALE_REAL_TO_SIM

        s = asmatrix(empty((1, 2 + 2 * self.numKilobots)))
        targetPos = matrix([objStartX, objStartY])

        while True:
            """ user interaction """
            # handle keys
            for event in pygame.event.get():
                if event.type == KEYDOWN:
                    if event.key == K_PLUS:
                        self.stepsPerSec *= 2
                    elif event.key == K_MINUS:
                        self.stepsPerSec = np.max([1, self.stepsPerSec / 2])

            """ drawing """
            self.screen.fill((0, 0, 0, 0))

            self.maze.draw(self.screen)
            self.pushObject.draw(self.screen)

            for kilobot in self.kilobots:
                kilobot.draw(self.screen)

            # draw light
            lx = int(self.SCALE_REAL_TO_VIS * lightPos[0, 0])
            ly = int(self.screen.get_height() - self.SCALE_REAL_TO_VIS *
                    lightPos[0, 1])
            lr = int(self.SCALE_REAL_TO_VIS * 0.02)
            gfxdraw.aacircle(self.screen, lx, ly, lr, (255, 255, 0))

            objPos = self.pushObject.getRealPosition()

            # draw line from object to target position
            ox = int(self.SCALE_REAL_TO_VIS * objPos[0, 0])
            oy = int(self.screen.get_height() - self.SCALE_REAL_TO_VIS *
                    objPos[0, 1])
            tx = int(self.SCALE_REAL_TO_VIS * targetPos[0, 0])
            ty = int(self.screen.get_height() - self.SCALE_REAL_TO_VIS *
                    targetPos[0, 1])

            pygame.draw.aaline(self.screen, (0, 0, 255), (ox, oy), (tx, ty))

            pygame.display.flip()
            self.clock.tick(self.stepsPerSec)

            """ simulation """
            # current state
            s[0, 0] = lightPos[0, 0] - objPos[0, 0]
            s[0, 1] = lightPos[0, 1] - objPos[0, 1]

            for (i, kilobot) in zip(range(self.numKilobots), self.kilobots):
                kbPos = kilobot.getRealPosition()
                s[0, 2 + 2 * i + 0] = kbPos[0, 0] - objPos[0, 0]
                s[0, 2 + 2 * i + 1] = kbPos[0, 1] - objPos[0, 1]

            # solve maze
            targetPos = self.mazePolicy.getTargetPosition(objPos)

            # rotate state
            direction = targetPos - objPos
            angle = -math.atan2(direction[0, 1], direction[0, 0])

            sx = s.flat[0::2] * math.cos(angle) - s.flat[1::2] * math.sin(angle)
            sy = s.flat[1::2] * math.cos(angle) + s.flat[0::2] * math.sin(angle)

            s.flat[0::2] = sx
            s.flat[1::2] = sy

            # choose action
            a = self.objPolicy.getMeanAction(s)

            # rotate action
            ax = a[0, 0] * math.cos(-angle) - a[0, 1] * math.sin(-angle)
            ay = a[0, 1] * math.cos(-angle) + a[0, 0] * math.sin(-angle)

            a[0, 0] = ax
            a[0, 1] = ay

            # take action
            n = linalg.norm(a)
            if n > 0.015:
                lightPos += (a * 0.015 / n)
            else:
                lightPos += a

            # move directly toward the light
            for kilobot in self.kilobots:
                kbPos = kilobot.getRealPosition()

                v = lightPos - kbPos

                # cap max velocity
                n = linalg.norm(v)
                if n > 0.01:
                    v *= (0.01 / n)

                kilobot.body.linearVelocity = vec2(v[0, 0], v[0, 1]) * \
                        self.SCALE_REAL_TO_SIM
                kilobot.body.linearDamping = 0.0

            for i in range(10):
                self.world.Step(0.1, 10, 10)
class KilobotsObjectMazeSimulator:
    WIDTH, HEIGHT = 1200, 600
    SCALE_REAL_TO_SIM = 10  # for numerical reasons
    SCALE_REAL_TO_VIS = HEIGHT  # 1m = HEIGHT pixels

    ZMQ_PORT = 2357

    def __init__(self):
        # pygame
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT),
                HWSURFACE | DOUBLEBUF, 32)
        pygame.display.set_caption('kbsim - 0.0s')
        self.clock = pygame.time.Clock()

        self.world = world(gravity=(0, 0), doSleep=True)

        # zqm
        context = Context()
        self.socket = context.socket(PAIR)
        self.socket.connect('tcp://localhost:{}'.format(self.ZMQ_PORT))

    def run(self):
        while True:
            msg = pickle.loads(self.socket.recv())

            if msg['message'] == 'sentPolicyModules':
                for fileName, source in msg['modules']:
                    with open(fileName, 'w') as f:
                        f.write(source)
                policyModule = importlib.import_module(msg['policyModule'])
            elif msg['message'] == 'getSamples':
                # load the policy
                policyDict = msg['policyDict']
                self.policy = policyModule.fromSerializableDict(policyDict)

                # read parameters
                self.objectShape = msg['objectShape']

                self.numKilobots = msg['numKilobots']
                self.numEpisodes = msg['numEpisodes']
                self.numStepsPerEpisode = msg['numStepsPerEpisode']
                self.stepsPerSec = msg['stepsPerSec']

                self.epsilon = msg['epsilon']
                self.useMean = msg['useMean']

                S, A, R, S_ = self._generateSamples()

                msg = {'message': 'sentSamples',
                       'samples': (S, A, R, S_)}
                self.socket.send(pickle.dumps(msg, protocol=2))
            else:
                print('got unexpected message')

    def _generateSamples(self):
        # create kilobots
        self.kilobots = []
        for i in range(self.numKilobots):
            kilobot = Kilobot(self.world, self.SCALE_REAL_TO_SIM,
                self.SCALE_REAL_TO_VIS, [0, 0])
            kilobot.fixture.friction = 20

            self.kilobots += [kilobot]

        self.pushObject = Object(self.world, self.SCALE_REAL_TO_SIM,
                self.SCALE_REAL_TO_VIS, [0, 0], self.objectShape)

        numSamples = self.numEpisodes * self.numStepsPerEpisode

        # fixed object start position
        objStartX = 1.0
        objStartY = 0.5
        objStart = array([objStartX, objStartY])

        # kilobots start in a circel around the object
        r = 1.5 * self.pushObject.HALF_W
        A = linspace(0, 2 * math.pi, self.numEpisodes + 1)[0:self.numEpisodes]
        startPositions = c_[objStartX + np.cos(A) * r,
                            objStartY + np.sin(A) * r];

        r = self.kilobots[0].RADIUS
        kilobotOffsets = array([[-r, -r], [r, -r], [-r, r], [r, r]])

        # s: light.x light.y kb.x1 kb.y1 ... kb.xn kb.yn
        #    everything is relative to the object position
        # a: light movement (dx, dy)
        S = asmatrix(empty((numSamples, 2 + 2 * self.numKilobots)))
        A = asmatrix(empty((numSamples, 2)))
        R = asmatrix(empty((numSamples, 1)))
        S_ = asmatrix(empty((numSamples, 2 + 2 * self.numKilobots)))

        for ep in range(startPositions.shape[0]):
            self.pushObject.body.position = vec2(objStartX, objStartY) *\
                    self.SCALE_REAL_TO_SIM
            self.pushObject.body.angle = 0

            # light starts in circel around the object
            start = startPositions[ep, :]
            lightPos = matrix(start)

            # kilobots start at the light position in a fixed formation
            for (i, kilobot) in zip(range(self.numKilobots), self.kilobots):
                x = start[0] + (1 + i / 4) * kilobotOffsets[i % 4, 0]
                y = start[1] + (1 + i / 4) * kilobotOffsets[i % 4, 1]
                kilobot.body.position = vec2(x, y) * self.SCALE_REAL_TO_SIM

            for step in range(self.numStepsPerEpisode):
                """ user interaction """
                # handle keys
                for event in pygame.event.get():
                    if event.type == KEYDOWN:
                        if event.key == K_PLUS:
                            self.stepsPerSec *= 2
                        elif event.key == K_MINUS:
                            self.stepsPerSec = np.max([1, self.stepsPerSec / 2])

                """ drawing """
                self.screen.fill((0, 0, 0, 0))

                self.pushObject.draw(self.screen)

                for kilobot in self.kilobots:
                    kilobot.draw(self.screen)

                # draw light
                lx = int(self.SCALE_REAL_TO_VIS * lightPos[0, 0])
                ly = int(self.screen.get_height() - self.SCALE_REAL_TO_VIS *
                        lightPos[0, 1])
                lr = int(self.SCALE_REAL_TO_VIS * 0.02)
                gfxdraw.aacircle(self.screen, lx, ly, lr, (255, 255, 0))

                pygame.display.set_caption(('ep: {} - step: {} - ' +
                    'stepsPerSec: {}').format(ep + 1, step + 1, self.stepsPerSec))

                pygame.display.flip()
                self.clock.tick(self.stepsPerSec)

                """ simulation """
                # current state
                objPos = self.pushObject.getRealPosition()
                objPosOld = objPos

                s = asmatrix(empty((1, S.shape[1])))
                s[0, 0] = lightPos[0, 0] - objPos[0, 0]
                s[0, 1] = lightPos[0, 1] - objPos[0, 1]

                for (i, kilobot) in zip(range(self.numKilobots), self.kilobots):
                    kbPos = kilobot.getRealPosition()
                    s[0, 2 + 2 * i + 0] = kbPos[0, 0] - objPos[0, 0]
                    s[0, 2 + 2 * i + 1] = kbPos[0, 1] - objPos[0, 1]

                # choose action
                if self.useMean:
                    a = self.policy.getMeanAction(s)
                else:
                    if random.random() <= self.epsilon:
                        a = self.policy.getRandomAction()
                    else:
                        a = self.policy.sampleActions(s)

                # take action
                n = linalg.norm(a)
                if n > 0.015:
                    lightPos += (a * 0.015 / n)
                else:
                    lightPos += a

                # move directly toward the light
                for kilobot in self.kilobots:
                    kbPos = kilobot.getRealPosition()

                    v = lightPos - kbPos

                    # cap max velocity
                    n = linalg.norm(v)
                    if n > 0.01:
                        v *= (0.01 / n)

                    kilobot.body.linearVelocity = vec2(v[0, 0], v[0, 1]) * \
                            self.SCALE_REAL_TO_SIM
                    kilobot.body.linearDamping = 0.0

                for i in range(10):
                    self.world.Step(0.1, 10, 10)

                # next state
                objPos = self.pushObject.getRealPosition()

                s_ = asmatrix(empty((1, S.shape[1])))
                s_[0, 0] = lightPos[0, 0] - objPos[0, 0]
                s_[0, 1] = lightPos[0, 1] - objPos[0, 1]

                for (i, kilobot) in zip(range(self.numKilobots), self.kilobots):
                    kbPos = kilobot.getRealPosition()
                    s_[0, 2 + 2 * i + 0] = kbPos[0, 0] - objPos[0, 0]
                    s_[0, 2 + 2 * i + 1] = kbPos[0, 1] - objPos[0, 1]

                # reward: learn to move the object to the right
                objMovement = objPos - objPosOld
                r = objMovement[0, 0] - 0.5 * np.abs(objMovement[0, 1])

                # record sample
                sampleIdx = ep * self.numStepsPerEpisode + step

                S[sampleIdx, :] = s
                A[sampleIdx, :] = a
                R[sampleIdx, :] = r
                S_[sampleIdx, :] = s_

        return S, A, R, S_