Ejemplos de MicropolisControl en Python

Lenguaje de programación: Python

Namespace/Package Name: corecontrol

Clase / Tipo: MicropolisControl

Ejemplos en hotexamples.com: 12

Python MicropolisControl - 12 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de corecontrol.MicropolisControl extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

MicropolisControl(6)

getFunds(6)

setFunds(6)

getResPop(6)

getIndPop(6)

takeAction(6)

getComPop(6)

close(6)

clearMap(6)

newMap(4)

render(4)

getDensityMaps(3)

clearBotBuilds(3)

doBotTool(2)

getPowerMap(2)

getTotPop(2)

getTrafficDensityMap(2)

getPopDensityMap(2)

simTick(1)

Ejemplo n.º 1

Mostrar archivo

    def setMapSize(self,
                   size,
                   print_map=False,
                   PADDING=0,
                   static_builds=True,
                   parallel_gui=False,
                   render_gui=False,
                   empty_start=True):
        self.empty_start = empty_start
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X,
                                       self.MAP_Y,
                                       PADDING,
                                       parallel_gui=parallel_gui)
        self.static_builds = True
        if self.static_builds:
            self.micro.map.initStaticBuilds()
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        self.num_scalars = 1
        # traffic, power, density
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars + 3
        if self.static_builds:
            self.num_obs_channels += 1

    #ac_low = np.zeros((3))
    #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
    #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}

        low_obs = np.zeros((self.num_obs_channels, self.MAP_X, self.MAP_Y))
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=int)
        self.state = None
        self.intsToActions = {}
        self.mapIntsToActions
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
        #       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.print_map = print_map
        self.render_gui = render_gui

Ejemplo n.º 2

Mostrar archivo

    def setMapSize(self, size, max_step=None, rank=None, print_map=False,
            PADDING=0, static_builds=True, parallel_gui=False,
            render_gui=False, empty_start=True, simple_reward=False,
            power_puzzle=False, record=False):
        if record: raise NotImplementedError
        if max_step is not None:
            self.max_step = max_step
        else:
            self.max_step = 1000
        self.empty_start = empty_start
        self.simple_reward = simple_reward
        self.power_puzzle = power_puzzle
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X, self.MAP_Y, PADDING, parallel_gui=parallel_gui, rank=rank,
                power_puzzle=power_puzzle)
        self.static_builds = True
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI=self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        # res, com, ind pop, demand
        self.num_scalars = 6
        self.num_density_maps = 3
        num_user_features = 1 # static builds
        # traffic, power, density
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars + self.num_density_maps + num_user_features
        #ac_low = np.zeros((3))
       #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
       #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X * self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}
 
        low_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y), fill_value=-1)
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y), fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs, high=high_obs, dtype = float)
        self.state = None
        self.intsToActions = {}
        self.actionsToInts = np.zeros((self.num_tools, self.MAP_X, self.MAP_Y))
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
#       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.print_map = print_map
        self.render_gui = render_gui
        self.mayor_rating = 50
        self.last_mayor_rating = self.mayor_rating
        
        self.last_priority_road_net_size = 0

Ejemplo n.º 3

Mostrar archivo

 def setMapSize(self, size, **kwargs):
     '''
     '''
     self.pre_gui(size, **kwargs)
     self.micro = MicropolisControl(self,
                                    self.MAP_X,
                                    self.MAP_Y,
                                    self.PADDING,
                                    rank=self.rank,
                                    power_puzzle=self.power_puzzle,
                                    gui=self.render_gui)
     self.post_gui()

Ejemplo n.º 4

Mostrar archivo

Archivo: env.py Proyecto: njustesen/gym-city

 def setMapSize(self, size, **kwargs):
     '''Do most of the actual initialization.
     '''
     self.pre_gui(size, **kwargs)
     #TODO: this better
     if hasattr(self, 'micro'):
         self.micro.reset_params(size)
     else:
         self.micro = MicropolisControl(self,
                                        self.MAP_X,
                                        self.MAP_Y,
                                        self.PADDING,
                                        rank=self.rank,
                                        power_puzzle=self.power_puzzle,
                                        gui=self.render_gui)
     self.city_metrics = self.get_city_metrics()
     self.last_city_metrics = self.city_metrics
     self.post_gui()

Ejemplo n.º 5

Mostrar archivo

 def setMapSize(self, size, print_map=False, PADDING=0, static_builds=True):
     self.MAP_X = size
     self.MAP_Y = size
     self.obs_width = self.MAP_X + PADDING * 2
     self.micro = MicropolisControl(self.MAP_X, self.MAP_Y, PADDING)
     self.static_builds = True
     if self.static_builds:
         self.micro.map.initStaticBuilds()
     self.win1 = self.micro.win1
     self.micro.SHOW_GUI = self.SHOW_GUI
     self.num_step = 0
     self.minFunds = 5000
     self.initFunds = 10000000
     self.num_tools = self.micro.num_tools
     self.num_zones = self.micro.num_zones
     self.num_scalars = 1
     # traffic, power, density
     self.num_obs_channels = self.num_zones + self.num_scalars + 3
     if self.static_builds:
         self.num_obs_channels += 1
     ac_low = np.zeros((3))
     ac_high = np.array(
         [self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
     self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
     self.last_state = None
     self.metadata = {'runtime.vectorized': True}
     low_obs = np.zeros((self.num_obs_channels, self.MAP_X, self.MAP_Y))
     high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                        fill_value=1)
     # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
     self.observation_space = spaces.Box(low=low_obs,
                                         high=high_obs,
                                         dtype=bool)
     self.state = None
     #       self.intsToActions = {}
     #       self.mapIntsToActions
     #       self.mapIntsToActions()
     self.last_pop = 0
     self.last_num_roads = 0

Ejemplo n.º 6

Mostrar archivo

 def setMapSize(self, MAP_X=6, MAP_Y=6):
     self.MAP_X = MAP_X
     self.MAP_Y = MAP_Y
     self.micro = MicropolisControl(MAP_X, MAP_Y)
     self.win1 = self.micro.win1
     self.micro.SHOW_GUI = self.SHOW_GUI
     self.num_step = 0
     self.minFunds = 1000
     self.initFunds = 1000000000
     self.num_tools = self.micro.num_tools
     self.num_zones = self.micro.num_zones
     self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                         self.MAP_Y)
     low_obs = np.zeros((self.num_zones, self.MAP_X, self.MAP_Y))
     high_obs = np.full((self.num_zones, self.MAP_X, self.MAP_Y),
                        fill_value=1)
     # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
     self.observation_space = spaces.Box(low=low_obs,
                                         high=high_obs,
                                         dtype=bool)
     self.state = None
     self.intsToActions = {}
     self.mapIntsToActions()
     self.last_pop = 0

Ejemplo n.º 7

Mostrar archivo

Archivo: env.py Proyecto: njustesen/gym-city

class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=20, MAP_Y=20, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False
        self.num_episode = 0
        self.max_static = 0
        self.player_step = False
        self.static_player_builds = False
        ### MIXED
        self.city_trgs = OrderedDict({
            'res_pop': 500,
            'com_pop': 50,
            'ind_pop': 50,
            'traffic': 2000,
            # i believe one plant is worth 12, the other 16?
            'num_plants': 14,
            'mayor_rating': 100
        })
        self.trg_param_vals = np.array([v for v in self.city_trgs.values()])
        self.param_bounds = OrderedDict({
            'res_pop': (0, 750),
            'com_pop': (0, 100),
            'ind_pop': (0, 100),
            'traffic': (0, 2000),
            'num_plants': (0, 100),
            'mayor_rating': (0, 100)
        })
        self.weights = OrderedDict({
            'res_pop': 1,
            'com_pop': 1,
            'ind_pop': 1,
            'traffic': 1,
            'num_plants': 0,
            'mayor_rating': 0,
        })

        self.num_params = 6
        # not necessarily true but should take care of most cases
        self.max_loss = 0
        i = 0
        self.param_ranges = []
        for param, (lb, ub) in self.param_bounds.items():
            weight = self.weights[param]
            rng = abs(ub - lb)
            self.param_ranges += [rng]
            if i < self.num_params:
                self.max_loss += rng * weight
                i += 1
### MIXED
#self.city_trgs = {
#        'res_pop': 1,
#        'com_pop': 4,
#        'ind_pop': 4,
#        'traffic': 0.2,
#        'num_plants': 0,
#        'mayor_rating': 0}
### Traffic
#self.city_trgs = {
#        'res_pop': 1,
#        'com_pop': 4,
#        'ind_pop': 4,
#        'traffic': 5,
#        'num_plants': 0,
#        'mayor_rating':0
#        }
        self.city_metrics = {}
        self.max_reward = 100

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        np.random.seed(seed)
        return [seed1, seed2]

    def setMapSize(self, size, **kwargs):
        '''Do most of the actual initialization.
        '''
        self.pre_gui(size, **kwargs)
        #TODO: this better
        if hasattr(self, 'micro'):
            self.micro.reset_params(size)
        else:
            self.micro = MicropolisControl(self,
                                           self.MAP_X,
                                           self.MAP_Y,
                                           self.PADDING,
                                           rank=self.rank,
                                           power_puzzle=self.power_puzzle,
                                           gui=self.render_gui)
        self.city_metrics = self.get_city_metrics()
        self.last_city_metrics = self.city_metrics
        self.post_gui()

    def pre_gui(self,
                size,
                max_step=None,
                rank=0,
                print_map=False,
                PADDING=0,
                static_builds=True,
                parallel_gui=False,
                render_gui=False,
                empty_start=True,
                simple_reward=False,
                power_puzzle=False,
                record=False,
                traffic_only=False,
                random_builds=False,
                poet=False,
                **kwargs):
        self.PADDING = PADDING
        self.rank = rank
        self.render_gui = render_gui
        self.random_builds = random_builds
        self.traffic_only = traffic_only
        if record: raise NotImplementedError
        if max_step is None:
            max_step = size * size
        self.max_step = max_step
        self.empty_start = empty_start
        self.simple_reward = simple_reward
        self.power_puzzle = power_puzzle
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.static_builds = True
        self.poet = poet
        self.print_map = print_map

    def post_gui(self):
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 0
        self.initFunds = self.micro.init_funds
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        # res, com, ind pop, demand
        self.num_scalars = 6
        self.num_density_maps = 3
        num_user_features = 1  # static builds
        # traffic, power, density
        print('num map features: {}'.format(self.micro.map.num_features))
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars \
                + self.num_density_maps + num_user_features
        if self.poet:
            self.num_obs_channels += len(self.city_trgs)
        #ac_low = np.zeros((3))

    #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
    #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}
        low_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                          fill_value=-1)
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=float)
        self.state = None
        self.intsToActions = {}
        self.actionsToInts = np.zeros((self.num_tools, self.MAP_X, self.MAP_Y))
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
        #       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.auto_reset = True
        self.mayor_rating = 50
        self.last_mayor_rating = self.mayor_rating
        self.last_priority_road_net_size = 0
        self.display_city_trgs()
        if self.render_gui and self.rank == 0:
            self.render()

    def get_param_bounds(self):
        return self.param_bounds

    def display_city_trgs(self):
        if self.win1 is not None:
            self.win1.agentPanel.displayTrgs(self.city_trgs)
        return self.city_trgs

    def mapIntsToActionsChunk(self):
        ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
        w0 = 20
        w1 = 10
        i = 0
        for j0 in range(self.MAP_X // w0):
            for k0 in range(self.MAP_Y // w0):
                for j1 in range(w0 // w1):
                    for k1 in range(w0 // w1):
                        for z in range(self.num_tools):
                            for x in range(j0 * w0 + j1 * w1,
                                           j0 * w0 + (j1 + 1) * w1):
                                for y in range(k0 * w0 + k1 * w1,
                                               k0 * w0 + (k1 + 1) * w1):
                                    self.intsToActions[i] = [z, x, y]
                                    i += 1

    def mapIntsToActions(self):
        ''' Unrolls the action vector in the same order as the pytorch model
        on its forward pass.'''
        chunk_width = 1
        i = 0
        for z in range(self.num_tools):
            for x in range(self.MAP_X):
                for y in range(self.MAP_Y):
                    self.intsToActions[i] = [z, x, y]
                    self.actionsToInts[z, x, y] = i
                    i += 1
        print('len of intsToActions: {}\n num tools: {}'.format(
            len(self.intsToActions), self.num_tools))

    def randomStep(self):
        self.step(self.action_space.sample())

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        num_static = self.MAP_X * self.MAP_Y / 10
        lst_epi = 500
        #       num_static = math.ceil(((lst_epi - self.num_episode) / lst_epi) * num_static)
        #       num_static = max(0, max_static)
        self.micro.setFunds(self.micro.init_funds)
        if num_static > 0:
            num_static = self.np_random.randint(0, num_static + 1)
        for i in range(num_static):
            if i % 2 == 0:
                static_build = True
            else:
                static_build = False
            self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = self.np_random.randint(0, 100)
        self.micro.setFunds(self.micro.init_funds)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def powerPuzzle(self):
        ''' Set up one plant, one res. If we restrict the agent to building power lines, we can test its ability
        to make long-range associations. '''
        for i in range(5):
            self.micro.doBotTool(np.random.randint(0, self.micro.MAP_X),
                                 np.random.randint(0, self.micro.MAP_Y),
                                 'Residential',
                                 static_build=True)
        while self.micro.map.num_plants == 0:
            self.micro.doBotTool(np.random.randint(0, self.micro.MAP_X),
                                 np.random.randint(0, self.micro.MAP_Y),
                                 'NuclearPowerPlant',
                                 static_build=True)

    def reset(self):
        self.display_city_trgs()
        if True:
            #if self.render_gui:
            if False:
                self.micro.clearBotBuilds()
            else:
                self.micro.clearMap()
        if not self.empty_start:
            self.micro.newMap()
        self.num_step = 0
        if self.power_puzzle:
            self.powerPuzzle()
        if self.random_builds:
            self.randomStaticStart()
        self.micro.simTick()
        self.city_metrics = self.get_city_metrics()
        self.last_city_metrics = self.city_metrics
        self.micro.setFunds(self.micro.init_funds)
        #curr_funds = self.micro.getFunds()
        self.curr_pop = 0
        self.curr_reward = self.getReward()
        self.state = self.getState()
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        self.num_episode += 1
        return self.state

# def getRoadPenalty(self):
#
#     class roadPenalty(torch.nn.module):
#         def __init__(self):
#             super(roadPenalty, self).__init__()

#             self.

    def getState(self):
        res_pop, com_pop, ind_pop = self.micro.getResPop(
        ), self.micro.getComPop(), self.micro.getIndPop()
        resDemand, comDemand, indDemand = self.micro.engine.getDemands()
        scalars = [res_pop, com_pop, ind_pop, resDemand, comDemand, indDemand]
        if self.poet:
            for j in range(3):
                scalars[j] = scalars[j] / self.param_ranges[j]
            trg_metrics = [v for k, v in self.city_trgs.items()]
            for i in range(len(trg_metrics)):
                trg_metrics[i] = trg_metrics[i] / self.param_ranges[i]
            scalars += trg_metrics
        return self.observation(scalars)

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        density_maps = self.micro.getDensityMaps()
        #if self.render_gui:
        #    print(density_maps[2])
        road_networks = self.micro.map.road_networks
        if self.render_gui:
            #print(road_networks, self.micro.map.road_net_sizes)
            pass
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            fill_val = scalars[si]
            if not type(fill_val) == str:
                scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, density_maps, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

    def getPop(self):
        self.resPop, self.comPop, self.indPop = self.micro.getResPop(), \
                                     self.micro.getComPop(), \
                                     self.micro.getIndPop()

        curr_pop = self.resPop + \
                   self.comPop + \
                   self.indPop

        return curr_pop

    def getReward(self):
        '''Calculate reward.
        '''
        if True:
            reward = 0
            for metric, trg in self.city_trgs.items():
                last_val = self.last_city_metrics[metric]
                trg_change = trg - last_val
                val = self.city_metrics[metric]
                change = val - last_val
                if np.sign(change) != np.sign(trg_change):
                    metric_rew = -abs(change)
                elif abs(change) < abs(trg_change):
                    metric_rew = abs(change)
                else:
                    metric_rew = abs(trg_change) - abs(trg_change - change)
                reward += metric_rew * self.weights[metric]

    #if self.render_gui and reward != 0:
    #    print(self.city_metrics)
    #    print(self.city_trgs)
    #    print(reward)
    #    print()

    #if False:
    #    max_reward = self.max_reward
    #    loss = 0
    #    i = 0
    #    for k, v in self.city_trgs.items():
    #        if i == self.num_params:
    #            break
    #        else:
    #            if True:
    #                reward = 0
    #                for metric_name, trg in self.city_trgs.items():

    #            weight = self.weights[k]
    #            loss += abs(v - self.city_metrics[k]) * weight
    #            i += 1

    #    reward = (self.max_loss - loss) * max_reward / self.max_loss
    #    reward = self.getPopReward()
    #self.curr_reward = reward
        return reward

    def getPopReward(self):
        if False:
            pop_reward = self.micro.getTotPop()

        else:
            resPop, comPop, indPop = (1 / 4) * self.micro.getResPop(
            ), self.micro.getComPop(), self.micro.getIndPop()
            pop_reward = resPop + comPop + indPop
            # population density per 16x16 section of map
            pop_reward = pop_reward / (self.MAP_X * self.MAP_Y / 16**2)
            zone_variety = 0
            if resPop > 0:
                zone_variety += 1
            if comPop > 0:
                zone_variety += 1
            if indPop > 0:
                zone_variety += 1
            zone_bonus = (zone_variety - 1) * 50
            pop_reward += max(0, zone_bonus)
        if False:
            pop_reward = (resPop + 1) * (comPop + 1) * (indPop + 1) - 1
        return 0
        return pop_reward

    def set_param_bounds(self, bounds):
        print('setting visual param bounds (TODO: forreal')
        if self.win1:
            self.win1.agentPanel.setMetricRanges(bounds)

    def set_params(self, trgs):
        for k, v in trgs.items():
            self.city_trgs[k] = v
        self.trg_param_vals = np.array([v for v in self.city_trgs.values()])
        self.display_city_trgs()

    #print('set city trgs of env {} to: {}'.format(self.rank, self.city_trgs))

    def get_city_metrics(self):
        res_pop, com_pop, ind_pop = self.micro.getResPop(), \
                                     self.micro.getComPop(), \
                                     self.micro.getIndPop()
        traffic = self.micro.total_traffic
        mayor_rating = self.getRating()
        num_plants = self.micro.map.num_plants
        city_metrics = {
            'res_pop': res_pop,
            'com_pop': com_pop,
            'ind_pop': ind_pop,
            'traffic': traffic,
            'num_plants': num_plants,
            'mayor_rating': mayor_rating
        }
        return city_metrics

    def display_city_metrics(self):
        if self.win1 is not None:
            self.win1.agentPanel.displayMetrics(self.city_metrics)

    def step(self, a, static_build=False):
        #self.micro.engine.setPasses(np.random.randint(1, 101))
        if self.player_step:
            #if self.player_step == a:
            #    static_build=False
            #static_build = True
            if self.static_player_builds:
                static_build = True
            a = self.player_step
            self.player_step = False

    #else:
    #    a = 0
        a = self.intsToActions[a]
        self.micro.takeAction(a, static_build)
        return self.postact()

    def postact(self):
        # never let the agent go broke, for now
        self.micro.setFunds(self.micro.init_funds)
        #print('rank {} tickin'.format(self.rank))
        # TODO: BROKEN!
        self.micro.simTick()
        self.state = self.getState()
        #print(self.state[-2])
        self.curr_pop = self.getPop()
        self.last_city_metrics = self.city_metrics
        self.city_metrics = self.get_city_metrics()
        if self.render_gui:
            self.display_city_metrics()

    #if self.traffic_only:
    #    self.curr_pop = self.getPopReward() / 1
    #   #self.curr_pop = 0
    #else:
    #    self.curr_pop = self.getPop() #** 2
    #   #self.curr_pop = self.getPopReward() #** 2
    #pop_reward = self.curr_pop
    #self.curr_mayor_rating = self.getRating()
    #if not self.simple_reward:
    #   #if self.micro.total_traffic > 0:
    #   #    print(self.micro.total_traffic)
    #    if self.traffic_only:
    #        traffic_reward = self.micro.total_traffic * 10
    #       #traffic_reward = 0
    #    else:
    #       #traffic_reward = self.micro.total_traffic / 100
    #        traffic_reward = self.reward_weights[3] * self.micro.total_traffic
    #    if self.player_step:
    #        print('pop reward: {}\n'
    #        'traffic reward: {}'.format(pop_reward, traffic_reward))
    #        self.player_step = None
    #    if pop_reward > 0 and traffic_reward > 0:
    #       #print(pop_reward, traffic_reward)
    #        pass
    #    reward = pop_reward  + traffic_reward
    #    if reward > 0 and self.micro.map.num_roads > 0 and not self.traffic_only: # to avoid one-road minima in early training
    #        max_net_1 = 0
    #        max_net_2 = 0
    #        for n in  self.micro.map.road_net_sizes.values():
    #            if n > max_net_1:
    #                max_net_1 = n
    #           #    max_net_2 = max_net_1
    #           #elif n > max_net_2:
    #           #    max_net_2 = n
        reward = 0

        reward = self.getReward()
        #reward = reward / (self.max_step)
        self.curr_funds = curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = (bankrupt or self.num_step >= self.max_step) and\
            self.auto_reset
        if self.print_map:
            #if static_build:
            #    print('STATIC BUILD')
            self.printMap()
        if self.render_gui:
            #pass
            self.micro.render()
        infos = {}
        # Get the next player-build ready, if there is one in the queue
        if self.micro.player_builds:
            b = self.micro.player_builds[0]
            a = self.actionsToInts[b]
            infos['player_move'] = int(a)
            self.micro.player_builds = self.micro.player_builds[1:]
            self.player_step = a
        self.num_step += 1
        ## Override Reward
        #reward = self.city_metrics['res_pop'] + self.city_metrics['com_pop']\
        #         + self.city_metrics['ind_pop'] + self.city_metrics['traffic']
        return (self.state, reward, terminal, infos)

    def getRating(self):
        return self.micro.engine.cityYes

    def printMap(self, static_builds=True):
        #if static_builds:
        #    static_map = self.micro.map.static_builds
        #else:
        #    static_map = None
        np.set_printoptions(threshold=np.inf)
        zone_map = self.micro.map.zoneMap[-1]
        zone_map = zone_map.transpose(1, 0)
        zone_map = np.array_repr(zone_map).replace(',  ', '  ').replace(
            '],\n', ']\n').replace(',\n', ',').replace(', ', ' ').replace(
                '        ', ' ').replace('         ', '  ')
        print(
            '{} \n population: {}, traffic: {}, episode: {}, step: {}, reward: {} \n'
            .format(
                zone_map,
                self.curr_pop,
                self.micro.total_traffic,
                self.num_episode,
                self.num_step,
                self.curr_reward  #, static_map
            ))

    #print(self.micro.map.centers)

    def render(self, mode='human'):
        self.micro.render()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())

    def set_res_weight(self, val):
        self.city_trgs['res_pop'] = val

    def set_com_weight(self, val):
        self.city_trgs['com_pop'] = val

    def set_ind_weight(self, val):
        self.city_trgs['ind_pop'] = val

    def set_traffic_weight(self, val):
        self.city_trgs['traffic'] = val

    def set_plants_weight(self, val):
        self.city_trgs['num_plants'] = val

    def set_rating_weight(self, val):
        self.city_trgs['mayor_rating'] = val

Ejemplo n.º 8

Mostrar archivo

Archivo: env.py Proyecto: branch3/gym-micropolis-ga

class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=20, MAP_Y=20, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False
        self.num_episode = 0
        self.max_step = 1000
        self.max_static = 0
        self.player_step = False
        self.last_reward = 0

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        np.random.seed(seed)
        return [seed1, seed2]

    def setMapSize(self,
                   size,
                   max_step=None,
                   rank=None,
                   print_map=False,
                   PADDING=0,
                   static_builds=True,
                   parallel_gui=False,
                   render_gui=False,
                   empty_start=True):
        if max_step is not None:
            self.max_step = max_step
        self.empty_start = empty_start
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X,
                                       self.MAP_Y,
                                       PADDING,
                                       parallel_gui=parallel_gui,
                                       rank=rank)
        self.static_builds = True
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        # res, com, ind pop, demand
        self.num_scalars = 6
        self.num_density_maps = 3
        num_user_features = 1  # static builds
        # traffic, power, density
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars + self.num_density_maps + num_user_features
        #ac_low = np.zeros((3))
        #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
        #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}

        low_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                          fill_value=-1)
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=float)
        self.state = None
        self.intsToActions = {}
        self.actionsToInts = np.zeros((self.num_tools, self.MAP_X, self.MAP_Y))
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
        #       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.print_map = print_map
        self.render_gui = render_gui
        self.mayor_rating = 50
        self.last_mayor_rating = self.mayor_rating

        self.last_priority_road_net_size = 0

    def mapIntsToActionsChunk(self):
        ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
        w0 = 20
        w1 = 10
        i = 0
        for j0 in range(self.MAP_X // w0):
            for k0 in range(self.MAP_Y // w0):
                for j1 in range(w0 // w1):
                    for k1 in range(w0 // w1):
                        for z in range(self.num_tools):
                            for x in range(j0 * w0 + j1 * w1,
                                           j0 * w0 + (j1 + 1) * w1):
                                for y in range(k0 * w0 + k1 * w1,
                                               k0 * w0 + (k1 + 1) * w1):
                                    self.intsToActions[i] = [z, x, y]
                                    i += 1

    def mapIntsToActions(self):
        ''' Unrolls the action vector in the same order as the pytorch model
        on its forward pass.'''
        chunk_width = 1
        i = 0
        for z in range(self.num_tools):
            for x in range(self.MAP_X):
                for y in range(self.MAP_Y):
                    self.intsToActions[i] = [z, x, y]
                    self.actionsToInts[z, x, y] = i
                    i += 1

    def randomStep(self):
        self.step(self.action_space.sample())

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        num_static = 100
        lst_epi = 500
        #       num_static = math.ceil(((lst_epi - self.num_episode) / lst_epi) * num_static)
        #       num_static = max(0, max_static)
        self.micro.setFunds(10000000)
        if num_static > 0:
            num_static = self.np_random.randint(0, num_static + 1)
        for i in range(num_static):
            if i % 2 == 0:
                static_build = True
            else:
                static_build = False
            self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = self.np_random.randint(0, 100)
        self.micro.setFunds(10000000)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def reset(self):
        if True:
            #if self.render_gui:
            if False:
                self.micro.clearBotBuilds()
            else:
                self.micro.clearMap()
        if not self.empty_start:
            self.micro.newMap()
        self.num_step = 0
        #self.randomStaticStart()
        self.micro.engine.simTick()
        self.micro.setFunds(self.initFunds)
        #curr_funds = self.micro.getFunds()
        curr_pop = self.getPop()
        self.state = self.getState()
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        self.num_episode += 1
        self.curr_reward = 0
        self.last_reward = 0
        return self.state

# def getRoadPenalty(self):
#
#     class roadPenalty(torch.nn.module):
#         def __init__(self):
#             super(roadPenalty, self).__init__()

#             self.

    def getState(self):
        resPop, comPop, indPop = self.micro.getResPop(), self.micro.getComPop(
        ), self.micro.getIndPop()
        resDemand, comDemand, indDemand = self.micro.engine.getDemands()
        scalars = [resPop, comPop, indPop, resDemand, comDemand, indDemand]
        return self.observation(scalars)

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        density_maps = self.micro.getDensityMaps()
        road_networks = self.micro.map.road_networks
        if self.render_gui:
            #print(road_networks, self.micro.map.road_net_sizes)
            pass
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, density_maps, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

    def getPop(self):
        resPop, comPop, indPop = (1/4) * self.micro.getResPop(), \
                                self.micro.getComPop(), \
                                self.micro.getIndPop()
        curr_pop = resPop + \
                   comPop + \
                   indPop

        return curr_pop

    def getPopReward(self):
        resPop, comPop, indPop = (1 / 4) * self.micro.getResPop(
        ), self.micro.getComPop(), self.micro.getIndPop()
        curr_pop = resPop + comPop + indPop
        zone_variety = 0
        if resPop > 0:
            zone_variety += 1
        if comPop > 0:
            zone_variety += 1
        if indPop > 0:
            zone_variety += 1
        zone_bonus = (zone_variety - 1) * 50
        curr_pop += max(0, zone_bonus)

        return curr_pop

    def step(self, a, static_build=False):
        if self.player_step:
            if self.player_step == a:
                static_build = False
            self.player_step = None
        a = self.intsToActions[a]
        self.micro.takeAction(a, static_build)
        reward = 0
        self.curr_pop = self.getPopReward()
        self.curr_mayor_rating = self.getRating()
        self.state = self.getState()
        reward = self.curr_pop + (self.micro.total_traffic / 100)
        if reward > 0 and self.micro.map.num_roads > 0:  # to avoid one-road minima in early training
            max_net = 0
            for n in self.micro.map.road_net_sizes.values():
                if n > max_net:
                    max_net = n
            reward += (max_net / self.micro.map.num_roads) * min(
                100,
                reward)  #the avg reward when roads are introduced to boost res
        reward -= min((max(1, self.micro.map.num_plants) - 1) * 1,
                      self.curr_pop / 2)
        self.curr_reward = reward  #- self.last_reward
        self.last_reward = reward

        #reward += (self.curr_mayor_rating - self.last_mayor_rating)
        self.last_mayor_rating = self.curr_mayor_rating
        self.last_pop = self.curr_pop
        curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = bankrupt or self.num_step >= self.max_step
        if True and self.print_map:
            if static_build:
                print('STATIC BUILD')
            self.printMap()
        if self.render_gui:
            self.micro.render()
        infos = {}
        if self.micro.player_builds:
            b = self.micro.player_builds[0]
            a = self.actionsToInts[b]
            infos['player_move'] = int(a)
            self.micro.player_builds = self.micro.player_builds[1:]
            self.player_step = a
        self.num_step += 1

        return (self.state, self.curr_reward, terminal, infos)

    def getRating(self):
        return self.micro.engine.cityYes

    def printMap(self, static_builds=True):
        if static_builds:
            static_map = self.micro.map.static_builds
        else:
            static_map = None
        np.set_printoptions(threshold=np.inf)
        zone_map = self.micro.map.zoneMap[-1]
        zone_map = np.array_repr(zone_map).replace(',  ', '  ').replace(
            '],\n', ']\n').replace(',\n', ',').replace(', ', ' ').replace(
                '        ', ' ').replace('         ', '  ')
        print(
            '{}\npopulation: {}, traffic: {}, episode: {}, step: {}, reward: {}\n {}'
            .format(zone_map, self.curr_pop, self.micro.total_traffic,
                    self.num_episode, self.num_step, self.curr_reward,
                    static_map))

    #print(self.micro.map.centers)

    def render(self, mode='human'):
        self.micro.render()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())

Ejemplo n.º 9

Mostrar archivo

class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=14, MAP_Y=14, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31. * 2
        seed2 = seeding.hash_seed(seed1 + 1)
        # Empirically, we need to seed before loading the ROM (ignoring this for now in our case).

    # return [seed1, seed2]

    def setMapSize(self, size, print_map=False, PADDING=0, static_builds=True):
        self.MAP_X = size
        self.MAP_Y = size
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X, self.MAP_Y, PADDING)
        self.static_builds = True
        if self.static_builds:
            self.micro.map.initStaticBuilds()
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        self.num_scalars = 1
        # traffic, power, density
        self.num_obs_channels = self.num_zones + self.num_scalars + 3
        if self.static_builds:
            self.num_obs_channels += 1
        ac_low = np.zeros((3))
        ac_high = np.array(
            [self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
        self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}
        low_obs = np.zeros((self.num_obs_channels, self.MAP_X, self.MAP_Y))
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=bool)
        self.state = None
        #       self.intsToActions = {}
        #       self.mapIntsToActions
        #       self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
#       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)

#def mapIntsToActionsChunk(self):
#    ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
#    w0 = 20
#    w1 = 10
#    i = 0
#    for j0 in range(self.MAP_X // w0):
#        for k0 in range(self.MAP_Y // w0):
#            for j1 in range(w0 // w1):
#                for k1 in range(w0 // w1):
#                    for z in range(self.num_tools):
#                        for x in range(j0 * w0 + j1*w1,
#                                j0 * w0 + (j1+1)*w1):
#                            for y in range(k0 * w0 + k1*w1,
#                                    k0 * w0 + (k1+1)*w1):
#                                self.intsToActions[i] = [z, x, y]
#                                i += 1

#def mapIntsToActions(self):
#    ''' Unrolls the action vector in the same order as the pytorch model
#    on its forward pass.'''
#    chunk_width = 1
#    i = 0
#    for z in range(self.num_tools):
#        for x in range(self.MAP_X):
#            for y in range(self.MAP_Y):
#                    self.intsToActions[i] = [z, x, y]
#                    i += 1

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        '''Cannot overwrite itself'''
        half_tiles = self.MAP_X * self.MAP_Y // 2
        r = np.random.randint(1, 5)
        self.micro.setFunds(10000000)
        # self.micr.map.initStaticBuilds
        for i in range(r):
            if self.micro.map.num_empty <= half_tiles:
                break
            else:
                self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = np.random.randint(0, 100)
        self.micro.setFunds(10000000)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def reset(self):
        self.micro.clearMap()
        self.num_step = 0
        #self.randomStart()
        self.randomStaticStart()
        self.micro.engine.simTick()
        self.micro.setFunds(self.initFunds)
        curr_funds = self.micro.getFunds()
        curr_pop = self.getPop()
        self.state = self.observation([curr_pop])
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        return self.state

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        power = self.micro.getPowerMap()
        pop = self.micro.getPopDensityMap()
        traffic = self.micro.getTrafficDensityMap()
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, power, pop, traffic, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

    def getPop(self):
        curr_pop = self.micro.getResPop() / 8 + self.micro.getComPop() + \
                self.micro.getIndPop()
        return curr_pop

    def step(self, a, static_build=False):
        reward = 0
        #       a = self.intsToActions[a]
        a = list(a)
        print(a)
        self.micro.takeAction(a, static_build)
        self.curr_pop = self.getPop()
        self.state = self.observation([self.curr_pop])
        #       reward += (self.micro.total_traffic - self.micro.last_total_traffic) / 50
        # anneal road reward
        #       road_diff = self.micro.num_roads - self.last_num_roads
        #       road_diff = road_diff * (max(0, 70 - self.micro.num_roads) / 70) * 0.2
        #       reward += road_diff  * (max(0, 7200 - abs(time.time() - self.start_time)) / 7200)
        # anneal the following to zero over 1hr
        reward += (
            self.curr_pop - self.last_pop
        )  #* (max(0, 14400 - abs(time.time() - self.start_time)) / 14400)
        self.last_num_roads = self.micro.num_roads
        #       print(self.micro.num_roads)
        self.last_pop = self.curr_pop
        curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = bankrupt or self.num_step >= 100
        if terminal and self.print_map:
            if static_build:
                print('STATIC BUILD')
            self.printMap()
        self.num_step += 1
        return (self.state, reward, terminal, {})

    def printMap(self):
        print('{}\npopulation: {}\ntraffic: {}\n{}\n'.format(
            np.add(self.micro.map.zoneMap[-1],
                   np.full((self.MAP_X, self.MAP_Y), 2)), self.curr_pop,
            self.micro.total_traffic, self.micro.map.static_builds))

    def render(self, mode='human'):
        # why does this need to happen twice (or else blank window)?
        gtk.main_iteration()
        gtk.main_iteration()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())

Ejemplo n.º 10

Mostrar archivo

class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=20, MAP_Y=20, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False
        self.num_episode = 0
        self.max_step = 500
        self.max_static = 0

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        np.random.seed(seed)
        return [seed1, seed2]

    def setMapSize(self,
                   size,
                   print_map=False,
                   PADDING=0,
                   static_builds=True,
                   parallel_gui=False,
                   render_gui=False,
                   empty_start=True):
        self.empty_start = empty_start
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self.MAP_X,
                                       self.MAP_Y,
                                       PADDING,
                                       parallel_gui=parallel_gui)
        self.static_builds = True
        if self.static_builds:
            self.micro.map.initStaticBuilds()
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        self.num_scalars = 1
        # traffic, power, density
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars + 3
        if self.static_builds:
            self.num_obs_channels += 1

    #ac_low = np.zeros((3))
    #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
    #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}

        low_obs = np.zeros((self.num_obs_channels, self.MAP_X, self.MAP_Y))
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=int)
        self.state = None
        self.intsToActions = {}
        self.mapIntsToActions
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
        #       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.print_map = print_map
        self.render_gui = render_gui

    def mapIntsToActionsChunk(self):
        ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
        w0 = 20
        w1 = 10
        i = 0
        for j0 in range(self.MAP_X // w0):
            for k0 in range(self.MAP_Y // w0):
                for j1 in range(w0 // w1):
                    for k1 in range(w0 // w1):
                        for z in range(self.num_tools):
                            for x in range(j0 * w0 + j1 * w1,
                                           j0 * w0 + (j1 + 1) * w1):
                                for y in range(k0 * w0 + k1 * w1,
                                               k0 * w0 + (k1 + 1) * w1):
                                    self.intsToActions[i] = [z, x, y]
                                    i += 1

    def mapIntsToActions(self):
        ''' Unrolls the action vector in the same order as the pytorch model
        on its forward pass.'''
        chunk_width = 1
        i = 0
        for z in range(self.num_tools):
            for x in range(self.MAP_X):
                for y in range(self.MAP_Y):
                    self.intsToActions[i] = [z, x, y]
                    i += 1

    def randomStep(self):
        self.step(self.action_space.sample())

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        num_static = 100
        lst_epi = 500
        #       num_static = math.ceil(((lst_epi - self.num_episode) / lst_epi) * num_static)
        #       num_static = max(0, max_static)
        self.micro.setFunds(10000000)
        if num_static > 0:
            num_static = self.np_random.randint(0, num_static + 1)
        for i in range(num_static):
            if i % 2 == 0:
                static_build = True
            else:
                static_build = False
            self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = self.np_random.randint(0, 100)
        self.micro.setFunds(10000000)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def reset(self):
        if self.empty_start:
            self.micro.clearMap()
        else:
            self.micro.newMap()
        self.num_step = 0
        #self.randomStaticStart()
        self.micro.engine.simTick()
        self.micro.setFunds(self.initFunds)
        #curr_funds = self.micro.getFunds()
        curr_pop = self.getPop()
        self.state = self.observation([curr_pop])
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        self.num_episode += 1
        return self.state

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        power = self.micro.getPowerMap()
        pop = self.micro.getPopDensityMap()
        traffic = self.micro.getTrafficDensityMap()
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, power, pop, traffic, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

    def getPop(self):
        curr_pop = 0.2 *  self.micro.getResPop() + \
                   12 * self.micro.getComPop() + \
                   4 * self.micro.getIndPop()
        return curr_pop

    def step(self, a, static_build=False):
        reward = 0
        a = self.intsToActions[a]
        self.micro.takeAction(a, static_build)
        self.curr_pop = self.getPop()
        self.state = self.observation([self.curr_pop])
        reward += (self.curr_pop - self.last_pop)
        #       reward += (self.micro.total_traffic - self.micro.last_total_traffic)
        self.last_pop = self.curr_pop
        curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = bankrupt or self.num_step >= self.max_step
        if True and self.print_map:
            if static_build:
                print('STATIC BUILD')
            self.printMap()
        self.num_step += 1
        if self.render_gui:
            self.micro.render()
        return (self.state, reward, terminal, {})

    def printMap(self, static_builds=True):
        if static_builds:
            static_map = self.micro.map.static_builds
        else:
            static_map = None
        np.set_printoptions(threshold=np.inf)
        zone_map = self.micro.map.zoneMap[-1]
        zone_map = np.array_repr(zone_map).replace(',  ', '  ').replace(
            '],\n', ']\n').replace(',\n', ',').replace(', ', ' ').replace(
                '        ', ' ').replace('         ', '  ')
        print('{}\npopulation: {}, traffic: {}, episode: {}, step: {} \n{}'.
              format(zone_map, self.curr_pop, self.micro.total_traffic,
                     self.num_episode, self.num_step, static_map))

    #print(self.micro.map.centers)

    def render(self, mode='human'):
        # why does this need to happen twice (or else blank window)?
        self.micro.render()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())

Ejemplo n.º 11

Mostrar archivo

class MicropolisEnv(core.Env):
    def __init__(self, MAP_X=20, MAP_Y=20, PADDING=0):
        self.SHOW_GUI = False
        self.start_time = time.time()
        self.print_map = False
        self.num_episode = 0
        self.max_static = 0
        self.player_step = False
        self.static_player_builds = False
        ### MIXED
        self.city_trgs = OrderedDict({
            'res_pop': 200,
            'com_pop': 100,
            'ind_pop': 100,
            'traffic': 100,
            'num_plants': 50,
            'mayor_rating': 100
        })
        self.param_bounds = {
            'res_pop': (0, 500),
            'com_pop': (0, 100),
            'ind_pop': (0, 100),
            'traffic': (0, 1000),
            'num_plants': (0, 100),
            'mayor_rating': (0, 100)
        }
        ### MIXED
        #self.city_trgs = {
        #        'res_pop': 1,
        #        'com_pop': 4,
        #        'ind_pop': 4,
        #        'traffic': 0.2,
        #        'num_plants': 0,
        #        'mayor_rating': 0}
        ### Traffic
        #self.city_trgs = {
        #        'res_pop': 1,
        #        'com_pop': 4,
        #        'ind_pop': 4,
        #        'traffic': 5,
        #        'num_plants': 0,
        #        'mayor_rating':0
        #        }
        self.city_metrics = {}
        self.max_reward = 100

    #self.setMapSize((MAP_X, MAP_Y), PADDING)

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        np.random.seed(seed)
        return [seed1, seed2]

    def setMapSize(self,
                   size,
                   max_step=None,
                   rank=None,
                   print_map=False,
                   PADDING=0,
                   static_builds=True,
                   parallel_gui=False,
                   render_gui=False,
                   empty_start=True,
                   simple_reward=False,
                   power_puzzle=False,
                   record=False,
                   traffic_only=False,
                   random_builds=False,
                   poet=False):
        self.random_builds = random_builds
        self.traffic_only = traffic_only
        if record: raise NotImplementedError
        self.max_step = max_step
        self.empty_start = empty_start
        self.simple_reward = simple_reward
        self.power_puzzle = power_puzzle
        if type(size) == int:
            self.MAP_X = size
            self.MAP_Y = size
        else:
            self.MAP_X = size[0]
            self.MAP_Y = size[1]
        self.obs_width = self.MAP_X + PADDING * 2
        self.micro = MicropolisControl(self,
                                       self.MAP_X,
                                       self.MAP_Y,
                                       PADDING,
                                       parallel_gui=parallel_gui,
                                       rank=rank,
                                       power_puzzle=power_puzzle)
        self.static_builds = True
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 5000
        self.initFunds = 10000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        # res, com, ind pop, demand
        self.num_scalars = 6
        self.num_density_maps = 3
        num_user_features = 1  # static builds
        # traffic, power, density
        self.num_obs_channels = self.micro.map.num_features + self.num_scalars + self.num_density_maps + num_user_features
        self.poet = poet
        if poet:
            self.num_obs_channels += len(self.city_trgs)
        #ac_low = np.zeros((3))

    #ac_high = np.array([self.num_tools - 1, self.MAP_X - 1, self.MAP_Y - 1])
    #self.action_space = spaces.Box(low=ac_low, high=ac_high, dtype=int)
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        self.last_state = None
        self.metadata = {'runtime.vectorized': True}

        low_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                          fill_value=-1)
        high_obs = np.full((self.num_obs_channels, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=float)
        self.state = None
        self.intsToActions = {}
        self.actionsToInts = np.zeros((self.num_tools, self.MAP_X, self.MAP_Y))
        self.mapIntsToActions()
        self.last_pop = 0
        self.last_num_roads = 0
        #       self.past_actions = np.full((self.num_tools, self.MAP_X, self.MAP_Y), False)
        self.print_map = print_map
        self.render_gui = render_gui
        self.auto_reset = True
        self.mayor_rating = 50
        self.last_mayor_rating = self.mayor_rating
        self.last_priority_road_net_size = 0
        self.display_city_trgs()

    def get_param_bounds(self):
        return self.param_bounds

    def display_city_trgs(self):
        self.win1.agentPanel.displayTrgs(self.city_trgs)
        return self.city_trgs

    def mapIntsToActionsChunk(self):
        ''' Unrolls the action vector into spatial chunks (does this matter empirically?).'''
        w0 = 20
        w1 = 10
        i = 0
        for j0 in range(self.MAP_X // w0):
            for k0 in range(self.MAP_Y // w0):
                for j1 in range(w0 // w1):
                    for k1 in range(w0 // w1):
                        for z in range(self.num_tools):
                            for x in range(j0 * w0 + j1 * w1,
                                           j0 * w0 + (j1 + 1) * w1):
                                for y in range(k0 * w0 + k1 * w1,
                                               k0 * w0 + (k1 + 1) * w1):
                                    self.intsToActions[i] = [z, x, y]
                                    i += 1

    def mapIntsToActions(self):
        ''' Unrolls the action vector in the same order as the pytorch model
        on its forward pass.'''
        chunk_width = 1
        i = 0
        for z in range(self.num_tools):
            for x in range(self.MAP_X):
                for y in range(self.MAP_Y):
                    self.intsToActions[i] = [z, x, y]
                    self.actionsToInts[z, x, y] = i
                    i += 1
        print('len of intsToActions: {}\n num tools: {}'.format(
            len(self.intsToActions), self.num_tools))

    def randomStep(self):
        self.step(self.action_space.sample())

    def close(self):
        self.micro.close()

    def randomStaticStart(self):
        num_static = self.MAP_X * self.MAP_Y / 10
        lst_epi = 500
        #       num_static = math.ceil(((lst_epi - self.num_episode) / lst_epi) * num_static)
        #       num_static = max(0, max_static)
        self.micro.setFunds(10000000)
        if num_static > 0:
            num_static = self.np_random.randint(0, num_static + 1)
        for i in range(num_static):
            if i % 2 == 0:
                static_build = True
            else:
                static_build = False
            self.step(self.action_space.sample(), static_build=True)

    def randomStart(self):
        r = self.np_random.randint(0, 100)
        self.micro.setFunds(10000000)
        for i in range(r):
            self.step(self.action_space.sample())
#       i = np.random.randint(0, (self.obs_width * self.obs_width / 3))
#       a = (np.random.randint(0, self.num_tools, i), np.random.randint(0, self.obs_width, i), np.random.randint(0, self.obs_width, i))
#       for j in range(i):
#           self.micro.takeSetupAction((a[0][j], a[1][j], a[2][j]))

    def powerPuzzle(self):
        ''' Set up one plant, one res. If we restrict the agent to building power lines, we can test its ability
        to make long-range associations. '''
        for i in range(5):
            self.micro.doBotTool(np.random.randint(0, self.micro.MAP_X),
                                 np.random.randint(0, self.micro.MAP_Y),
                                 'Residential',
                                 static_build=True)
        while self.micro.map.num_plants == 0:
            self.micro.doBotTool(np.random.randint(0, self.micro.MAP_X),
                                 np.random.randint(0, self.micro.MAP_Y),
                                 'NuclearPowerPlant',
                                 static_build=True)

    def reset(self):
        self.display_city_trgs()
        if True:
            #if self.render_gui:
            if False:
                self.micro.clearBotBuilds()
            else:
                self.micro.clearMap()
        if not self.empty_start:
            self.micro.newMap()
        self.num_step = 0
        if self.power_puzzle:
            self.powerPuzzle()
        if self.random_builds:
            self.randomStaticStart()
        self.micro.engine.simTick()
        self.micro.setFunds(self.initFunds)
        #curr_funds = self.micro.getFunds()
        #curr_pop = self.getPop()
        self.state = self.getState()
        self.last_pop = 0
        self.micro.num_roads = 0
        self.last_num_roads = 0
        #self.past_actions.fill(False)
        self.num_episode += 1
        return self.state

# def getRoadPenalty(self):
#
#     class roadPenalty(torch.nn.module):
#         def __init__(self):
#             super(roadPenalty, self).__init__()

#             self.

    def getState(self):
        res_pop, com_pop, ind_pop = self.micro.getResPop(
        ), self.micro.getComPop(), self.micro.getIndPop()
        resDemand, comDemand, indDemand = self.micro.engine.getDemands()
        scalars = [res_pop, com_pop, ind_pop, resDemand, comDemand, indDemand]
        if self.poet:
            trg_metrics = [v for v in self.city_trgs.values()]
            scalars += trg_metrics
        return self.observation(scalars)

    def observation(self, scalars):
        state = self.micro.map.getMapState()
        density_maps = self.micro.getDensityMaps()
        #if self.render_gui:
        #    print(density_maps[2])
        road_networks = self.micro.map.road_networks
        if self.render_gui:
            #print(road_networks, self.micro.map.road_net_sizes)
            pass
        scalar_layers = np.zeros((len(scalars), self.MAP_X, self.MAP_Y))
        for si in range(len(scalars)):
            fill_val = scalars[si]
            if not type(fill_val) == str:
                scalar_layers[si].fill(scalars[si])
        state = np.concatenate((state, density_maps, scalar_layers), 0)
        if self.static_builds:
            state = np.concatenate((state, self.micro.map.static_builds), 0)
        return state

#   def getPop(self):
#       self.resPop, self.comPop, self.indPop = self.micro.getResPop(), \
#                                    self.micro.getComPop(), \
#                                    self.micro.getIndPop()

#       curr_pop = resPop + \
#                  comPop + \
#                  indPop

#       return curr_pop

#   def getPopReward(self):
#       if self.simple_reward:
#           return self.micro.getTotPop()
#       else:
#           resPop, comPop, indPop = (1/4) * self.micro.getResPop(), self.micro.getComPop(), self.micro.getIndPop()
#           curr_pop = resPop + comPop + indPop
#           zone_variety = 0
#           if resPop > 0:
#               zone_variety += 1
#           if comPop > 0:
#               zone_variety += 1
#           if indPop > 0:
#               zone_variety += 1
#           zone_bonus = (zone_variety - 1) * 50
#           curr_pop += max(0, zone_bonus)

#           return curr_pop

    def set_metric_ranges(self, metric_ranges):
        self.win1.agentPanel.setMetricRanges(metric_ranges)

    def set_city_trgs(self, trgs):
        for k, v in trgs.items():
            self.city_trgs[k] = v
        print('set city trgs to: {}'.format(self.city_trgs))

    def get_city_metrics(self):
        res_pop, com_pop, ind_pop = self.micro.getResPop(), \
                                     self.micro.getComPop(), \
                                     self.micro.getIndPop()
        traffic = self.micro.total_traffic
        mayor_rating = self.getRating()
        num_plants = self.micro.map.num_plants
        #if self.render_gui:
        #    print(res_pop)
        city_metrics = {
            'res_pop': res_pop,
            'com_pop': com_pop,
            'ind_pop': ind_pop,
            'traffic': traffic,
            'num_plants': num_plants,
            'mayor_rating': mayor_rating
        }

        #self.win1.agentPanel.show_resPop(res_pop)
        #self.win1.agentPanel.show_comPop(com_pop)
        #self.win1.agentPanel.show_indPop(ind_pop)
        #self.win1.agentPanel.show_traffic(traffic)
        #self.win1.agentPanel.show_numPlants(num_plants)
        #self.win1.agentPanel.show_mayorRating(mayor_rating)

        return city_metrics

    def display_city_metrics(self):
        self.win1.agentPanel.displayMetrics(self.city_metrics)

    def step(self, a, static_build=False):
        #self.micro.engine.setPasses(np.random.randint(1, 101))
        if self.player_step:
            #if self.player_step == a:
            #    static_build=False
            #static_build = True
            if self.static_player_builds:
                static_build = True
            a = self.player_step
            self.player_step = False
    #else:
    #    a = 0
        a = self.intsToActions[a]
        self.micro.takeAction(a, static_build)
        self.state = self.getState()
        self.city_metrics = self.get_city_metrics()
        if self.render_gui:
            self.display_city_metrics()

    #if self.traffic_only:
    #    self.curr_pop = self.getPopReward() / 1
    #   #self.curr_pop = 0
    #else:
    #    self.curr_pop = self.getPop() #** 2
    #   #self.curr_pop = self.getPopReward() #** 2
    #pop_reward = self.curr_pop
    #self.curr_mayor_rating = self.getRating()
    #if not self.simple_reward:
    #   #if self.micro.total_traffic > 0:
    #   #    print(self.micro.total_traffic)
    #    if self.traffic_only:
    #        traffic_reward = self.micro.total_traffic * 10
    #       #traffic_reward = 0
    #    else:
    #       #traffic_reward = self.micro.total_traffic / 100
    #        traffic_reward = self.reward_weights[3] * self.micro.total_traffic
    #    if self.player_step:
    #        print('pop reward: {}\n'
    #        'traffic reward: {}'.format(pop_reward, traffic_reward))
    #        self.player_step = None
    #    if pop_reward > 0 and traffic_reward > 0:
    #       #print(pop_reward, traffic_reward)
    #        pass
    #    reward = pop_reward  + traffic_reward
    #    if reward > 0 and self.micro.map.num_roads > 0 and not self.traffic_only: # to avoid one-road minima in early training
    #        max_net_1 = 0
    #        max_net_2 = 0
    #        for n in  self.micro.map.road_net_sizes.values():
    #            if n > max_net_1:
    #                max_net_1 = n
    #           #    max_net_2 = max_net_1
    #           #elif n > max_net_2:
    #           #    max_net_2 = n
        reward = 0
        #for k, v in self.city_trgs.items():
        #    if k!= 'name':
        #        reward += v * self.city_metrics[k]
        max_reward = self.max_reward
        self.loss = 0
        for k, v in self.city_trgs.items():
            self.loss += (v - self.city_metrics[k])**2
            self.loss = math.sqrt(self.loss * 4)
        self.curr_reward = reward = max(0, max_reward - self.loss)
        #self.curr_reward = math.log10(self.loss * max_reward)
        #if self.render_gui:
        #    print('loss: {}'.format(self.loss))
        #    print('reward: {}'.format(self.curr_reward))
        #reward += (max_net_1 / self.micro.map.num_roads) * min(100, reward)
        #reward += (min(max_net_1, max_net_2) / self.micro.map.num_roads) * min(100, reward) # the avg reward when roads are introduced to boost res, so
        # proportion of max net to total roads *
        #if not self.traffic_only:
        #   #pass
        #    reward -= min((max(1, self.micro.map.num_plants) - 1) * 1,
        #                 self.curr_pop / 2)
        #self.last_pop = self.curr_pop
        curr_funds = self.micro.getFunds()
        bankrupt = curr_funds < self.minFunds
        terminal = (bankrupt or self.num_step >= self.max_step) and\
            self.auto_reset
        if self.render_gui and self.print_map:
            #if static_build:
            #    print('STATIC BUILD')
            self.printMap()
        if self.render_gui:
            #pass
            self.micro.render()
        infos = {}
        if self.micro.player_builds:
            b = self.micro.player_builds[0]
            a = self.actionsToInts[b]
            infos['player_move'] = int(a)
            self.micro.player_builds = self.micro.player_builds[1:]
            self.player_step = a
        self.num_step += 1
        reward = reward / self.max_step

        return (self.state, reward, terminal, infos)

    def getRating(self):
        return self.micro.engine.cityYes

    def printMap(self, static_builds=True):
        #if static_builds:
        #    static_map = self.micro.map.static_builds
        #else:
        #    static_map = None
        np.set_printoptions(threshold=np.inf)
        zone_map = self.micro.map.zoneMap[-1]
        zone_map = np.array_repr(zone_map).replace(',  ', '  ').replace(
            '],\n', ']\n').replace(',\n', ',').replace(', ', ' ').replace(
                '        ', ' ').replace('         ', '  ')
        print(
            '{} \n population: {}, traffic: {}, episode: {}, step: {}, reward: {} \n'
            .format(
                zone_map,
                self.curr_pop,
                self.micro.total_traffic,
                self.num_episode,
                self.num_step,
                self.curr_reward  #, static_map
            ))

    #print(self.micro.map.centers)

    def render(self, mode='human'):
        self.micro.render()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())

    def set_res_weight(self, val):
        self.city_trgs['res_pop'] = val

    def set_com_weight(self, val):
        self.city_trgs['com_pop'] = val

    def set_ind_weight(self, val):
        self.city_trgs['ind_pop'] = val

    def set_traffic_weight(self, val):
        self.city_trgs['traffic'] = val

    def set_plants_weight(self, val):
        self.city_trgs['num_plants'] = val

    def set_rating_weight(self, val):
        self.city_trgs['mayor_rating'] = val

Ejemplo n.º 12

Mostrar archivo

class MicropolisEnv(core.Env):
    def __init__(self):
        self.SHOW_GUI = False

    def setMapSize(self, MAP_X=6, MAP_Y=6):
        self.MAP_X = MAP_X
        self.MAP_Y = MAP_Y
        self.micro = MicropolisControl(MAP_X, MAP_Y)
        self.win1 = self.micro.win1
        self.micro.SHOW_GUI = self.SHOW_GUI
        self.num_step = 0
        self.minFunds = 1000
        self.initFunds = 1000000000
        self.num_tools = self.micro.num_tools
        self.num_zones = self.micro.num_zones
        self.action_space = spaces.Discrete(self.num_tools * self.MAP_X *
                                            self.MAP_Y)
        low_obs = np.zeros((self.num_zones, self.MAP_X, self.MAP_Y))
        high_obs = np.full((self.num_zones, self.MAP_X, self.MAP_Y),
                           fill_value=1)
        # TODO: can/should we use Tuples of MultiBinaries instead, for greater efficiency?
        self.observation_space = spaces.Box(low=low_obs,
                                            high=high_obs,
                                            dtype=bool)
        self.state = None
        self.intsToActions = {}
        self.mapIntsToActions()
        self.last_pop = 0

    def mapIntsToActions(self):
        i = 0
        for x in range(self.MAP_X):
            for y in range(self.MAP_Y):
                for z in range(self.num_tools):
                    self.intsToActions[i] = [z, x, y]
                    i += 1

    def close(self):
        self.micro.close()

    def reset(self):
        self.micro.clearMap()
        # self.micro.layGrid(7,19)
        self.num_step = 0
        self.micro.setFunds(self.initFunds)
        self.state = self.micro.map.getMapState()
        self.last_pop = 0
        return self.state

    def step(self, a):

        self.micro.takeAction(self.intsToActions[a])
        curr_pop = self.micro.getResPop() / 8 + self.micro.getComPop() + \
                self.micro.getIndPop()
        reward = curr_pop
        #   pop_diff = curr_pop - self.last_pop
        #   # Reward function
        #   if pop_diff > 0:
        #       reward = 1
        #   elif pop_diff < 0:
        #       reward = -1
        #   else:
        #       reward = 0
        # # print(curr_pop, reward)
        #   self.last_pop = curr_pop
        terminal = False
        if self.num_step % 10 == 0 and self.micro.getFunds() < self.minFunds:
            terminal = True
        terminal = self.num_step == 1000
        self.num_step += 1
        return (self.micro.map.getMapState(), reward, terminal, {})

    def render(self, mode='human'):
        gtk.mainiteration()

    def test(self):
        env = MicropolisEnv()
        for i in range(5000):
            env.step(env.action_space.sample())