コード例 #1
0
class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=14, MAP_Y=14, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31. * 2
        seed2 = seeding.hash_seed(seed1 + 1)
        # Empirically, we need to seed before loading the ROM (ignoring this for now in our case).

    # return [seed1, seed2]

    def setMapSize(self, size, print_map=False, PADDING=0, static_builds=True):
        self.MAP_X = size
        self.MAP_Y = size
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X, self.MAP_Y, PADDING)
        self.static_builds = True
        if self.static_builds:
            self.micro.map.initStaticBuilds()
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        self.num_scalars = 1
        # traffic, power, density
        self.num_obs_channels = self.num_zones + self.num_scalars + 3
        if self.static_builds:
            self.num_obs_channels += 1
        ac_low = np.zeros((3))
        ac_high = np.array(
            [self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
        self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}
        low_obs = np.zeros((self.num_obs_channels, self.MAP_X, self.MAP_Y))
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=bool)
        self.state = None
        #       self.intsToActions = {}
        #       self.mapIntsToActions
        #       self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
#       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)

#def mapIntsToActionsChunk(self):
#    ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
#    w0 = 20
#    w1 = 10
#    i = 0
#    for j0 in range(self.MAP_X // w0):
#        for k0 in range(self.MAP_Y // w0):
#            for j1 in range(w0 // w1):
#                for k1 in range(w0 // w1):
#                    for z in range(self.num_tools):
#                        for x in range(j0 * w0 + j1*w1,
#                                j0 * w0 + (j1+1)*w1):
#                            for y in range(k0 * w0 + k1*w1,
#                                    k0 * w0 + (k1+1)*w1):
#                                self.intsToActions[i] = [z, x, y]
#                                i += 1

#def mapIntsToActions(self):
#    ''' Unrolls the action vector in the same order as the pytorch model
#    on its forward pass.'''
#    chunk_width = 1
#    i = 0
#    for z in range(self.num_tools):
#        for x in range(self.MAP_X):
#            for y in range(self.MAP_Y):
#                    self.intsToActions[i] = [z, x, y]
#                    i += 1

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        '''Cannot overwrite itself'''
        half_tiles = self.MAP_X * self.MAP_Y // 2
        r = np.random.randint(1, 5)
        self.micro.setFunds(10000000)
        # self.micr.map.initStaticBuilds
        for i in range(r):
            if self.micro.map.num_empty <= half_tiles:
                break
            else:
                self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = np.random.randint(0, 100)
        self.micro.setFunds(10000000)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def reset(self):
        self.micro.clearMap()
        self.num_step = 0
        #self.randomStart()
        self.randomStaticStart()
        self.micro.engine.simTick()
        self.micro.setFunds(self.initFunds)
        curr_funds = self.micro.getFunds()
        curr_pop = self.getPop()
        self.state = self.observation([curr_pop])
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        return self.state

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        power = self.micro.getPowerMap()
        pop = self.micro.getPopDensityMap()
        traffic = self.micro.getTrafficDensityMap()
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, power, pop, traffic, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

    def getPop(self):
        curr_pop = self.micro.getResPop() / 8 + self.micro.getComPop() + \
                self.micro.getIndPop()
        return curr_pop

    def step(self, a, static_build=False):
        reward = 0
        #       a = self.intsToActions[a]
        a = list(a)
        print(a)
        self.micro.takeAction(a, static_build)
        self.curr_pop = self.getPop()
        self.state = self.observation([self.curr_pop])
        #       reward += (self.micro.total_traffic - self.micro.last_total_traffic) / 50
        # anneal road reward
        #       road_diff = self.micro.num_roads - self.last_num_roads
        #       road_diff = road_diff * (max(0, 70 - self.micro.num_roads) / 70) * 0.2
        #       reward += road_diff  * (max(0, 7200 - abs(time.time() - self.start_time)) / 7200)
        # anneal the following to zero over 1hr
        reward += (
            self.curr_pop - self.last_pop
        )  #* (max(0, 14400 - abs(time.time() - self.start_time)) / 14400)
        self.last_num_roads = self.micro.num_roads
        #       print(self.micro.num_roads)
        self.last_pop = self.curr_pop
        curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = bankrupt or self.num_step >= 100
        if terminal and self.print_map:
            if static_build:
                print('STATIC BUILD')
            self.printMap()
        self.num_step += 1
        return (self.state, reward, terminal, {})

    def printMap(self):
        print('{}\npopulation: {}\ntraffic: {}\n{}\n'.format(
            np.add(self.micro.map.zoneMap[-1],
                   np.full((self.MAP_X, self.MAP_Y), 2)), self.curr_pop,
            self.micro.total_traffic, self.micro.map.static_builds))

    def render(self, mode='human'):
        # why does this need to happen twice (or else blank window)?
        gtk.main_iteration()
        gtk.main_iteration()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())
コード例 #2
0
class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=20, MAP_Y=20, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False
        self.num_episode = 0
        self.max_step = 500
        self.max_static = 0

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        np.random.seed(seed)
        return [seed1, seed2]

    def setMapSize(self,
                   size,
                   print_map=False,
                   PADDING=0,
                   static_builds=True,
                   parallel_gui=False,
                   render_gui=False,
                   empty_start=True):
        self.empty_start = empty_start
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X,
                                       self.MAP_Y,
                                       PADDING,
                                       parallel_gui=parallel_gui)
        self.static_builds = True
        if self.static_builds:
            self.micro.map.initStaticBuilds()
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        self.num_scalars = 1
        # traffic, power, density
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars + 3
        if self.static_builds:
            self.num_obs_channels += 1

    #ac_low = np.zeros((3))
    #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
    #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}

        low_obs = np.zeros((self.num_obs_channels, self.MAP_X, self.MAP_Y))
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=int)
        self.state = None
        self.intsToActions = {}
        self.mapIntsToActions
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
        #       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.print_map = print_map
        self.render_gui = render_gui

    def mapIntsToActionsChunk(self):
        ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
        w0 = 20
        w1 = 10
        i = 0
        for j0 in range(self.MAP_X // w0):
            for k0 in range(self.MAP_Y // w0):
                for j1 in range(w0 // w1):
                    for k1 in range(w0 // w1):
                        for z in range(self.num_tools):
                            for x in range(j0 * w0 + j1 * w1,
                                           j0 * w0 + (j1 + 1) * w1):
                                for y in range(k0 * w0 + k1 * w1,
                                               k0 * w0 + (k1 + 1) * w1):
                                    self.intsToActions[i] = [z, x, y]
                                    i += 1

    def mapIntsToActions(self):
        ''' Unrolls the action vector in the same order as the pytorch model
        on its forward pass.'''
        chunk_width = 1
        i = 0
        for z in range(self.num_tools):
            for x in range(self.MAP_X):
                for y in range(self.MAP_Y):
                    self.intsToActions[i] = [z, x, y]
                    i += 1

    def randomStep(self):
        self.step(self.action_space.sample())

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        num_static = 100
        lst_epi = 500
        #       num_static = math.ceil(((lst_epi - self.num_episode) / lst_epi) * num_static)
        #       num_static = max(0, max_static)
        self.micro.setFunds(10000000)
        if num_static > 0:
            num_static = self.np_random.randint(0, num_static + 1)
        for i in range(num_static):
            if i % 2 == 0:
                static_build = True
            else:
                static_build = False
            self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = self.np_random.randint(0, 100)
        self.micro.setFunds(10000000)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def reset(self):
        if self.empty_start:
            self.micro.clearMap()
        else:
            self.micro.newMap()
        self.num_step = 0
        #self.randomStaticStart()
        self.micro.engine.simTick()
        self.micro.setFunds(self.initFunds)
        #curr_funds = self.micro.getFunds()
        curr_pop = self.getPop()
        self.state = self.observation([curr_pop])
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        self.num_episode += 1
        return self.state

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        power = self.micro.getPowerMap()
        pop = self.micro.getPopDensityMap()
        traffic = self.micro.getTrafficDensityMap()
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, power, pop, traffic, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

    def getPop(self):
        curr_pop = 0.2 *  self.micro.getResPop() + \
                   12 * self.micro.getComPop() + \
                   4 * self.micro.getIndPop()
        return curr_pop

    def step(self, a, static_build=False):
        reward = 0
        a = self.intsToActions[a]
        self.micro.takeAction(a, static_build)
        self.curr_pop = self.getPop()
        self.state = self.observation([self.curr_pop])
        reward += (self.curr_pop - self.last_pop)
        #       reward += (self.micro.total_traffic - self.micro.last_total_traffic)
        self.last_pop = self.curr_pop
        curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = bankrupt or self.num_step >= self.max_step
        if True and self.print_map:
            if static_build:
                print('STATIC BUILD')
            self.printMap()
        self.num_step += 1
        if self.render_gui:
            self.micro.render()
        return (self.state, reward, terminal, {})

    def printMap(self, static_builds=True):
        if static_builds:
            static_map = self.micro.map.static_builds
        else:
            static_map = None
        np.set_printoptions(threshold=np.inf)
        zone_map = self.micro.map.zoneMap[-1]
        zone_map = np.array_repr(zone_map).replace(',  ', '  ').replace(
            '],\n', ']\n').replace(',\n', ',').replace(', ', ' ').replace(
                '        ', ' ').replace('         ', '  ')
        print('{}\npopulation: {}, traffic: {}, episode: {}, step: {} \n{}'.
              format(zone_map, self.curr_pop, self.micro.total_traffic,
                     self.num_episode, self.num_step, static_map))

    #print(self.micro.map.centers)

    def render(self, mode='human'):
        # why does this need to happen twice (or else blank window)?
        self.micro.render()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())