예제 #1
0
 def resize_ice_config(self, L, mcsteps):
     """Resize the whole system."""
     # Resize the system size
     self.L = L
     self.num_mcsteps = mcsteps
     self.N = 4*L**2
     self.sL = int(np.sqrt(self.N)) # square length L 
     self.mc_info = INFO(self.L, self.N, 1, 1, 1, mcsteps, 1, mcsteps)
     # Allocate sim again.
     self.sim = SQIceGame(self.mc_info)
     self.sim.set_temperature (self.kT)
     self.sim.init_model()
     self.sim.mc_run(self.num_mcsteps)
     self.dump_env_setting()
예제 #2
0
L = 32
kT = 0.0001
J = 1
N = L**2

num_neighbors = 2
num_replicas = 1
num_mcsteps = 2000
num_bins = 1
num_thermalization = num_mcsteps
tempering_period = 1

mc_info = INFO(L, N, num_neighbors, num_replicas, num_bins, num_mcsteps, tempering_period, num_thermalization)

# initalize the system, lattice config
sim = SQIceGame(mc_info)
sim.set_temperature (kT)
sim.init_model()
sim.mc_run(num_mcsteps)

sim.start(100)

eng_map = sim.get_energy_map()
print(eng_map)
print(type(eng_map))

for i in range(10):
    print (sim.draw(np.random.randint(6)))
    print (sim.get_trajectory())
sites = sim.get_trajectory()
site_diffs = [j-i for i, j in zip(sites[:-1], sites[1:])]
예제 #3
0
    def __init__(self, L, kT, J):
        self.L = L
        self.kT = kT
        self.J = J
        self.N = L**2
        num_neighbors = 2
        num_replicas = 1
        num_mcsteps = 2000
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1

        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature(self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.name_mapping = dict({
            0: 'right',
            1: 'down',
            2: 'left',
            3: 'up',
            4: 'lower_next',
            5: 'upper_next',
            6: 'metropolis',
        })

        self.index_mapping = dict({
            'right': 0,
            'down': 1,
            'left': 2,
            'up': 3,
            'lower_next': 4,
            'upper_next': 5,
            'metropolis': 6,
        })

        ### action space and state space
        self.action_space = spaces.Discrete(len(self.name_mapping))
        self.observation_space = spaces.Box(low=-1.0,
                                            high=1.0,
                                            shape=(self.L, self.L, 4))
        self.reward_range = (-1, 1)

        # output file
        self.ofilename = 'loop_sites.log'
        # render file
        self.rfilename = 'loop_renders.log'
        # save log to json for future analysis
        self.json_file = 'env_history.json'

        self.stacked_axis = 2

        ## counts reset()
        self.episode_counter = 0

        ## ray test
        self.auto_metropolis = False
예제 #4
0
class IceGameEnv(core.Env):
    def __init__(self, L, kT, J):
        self.L = L
        self.kT = kT
        self.J = J
        self.N = L**2
        num_neighbors = 2
        num_replicas = 1
        num_mcsteps = 2000
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1

        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature(self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.name_mapping = dict({
            0: 'right',
            1: 'down',
            2: 'left',
            3: 'up',
            4: 'lower_next',
            5: 'upper_next',
            6: 'metropolis',
        })

        self.index_mapping = dict({
            'right': 0,
            'down': 1,
            'left': 2,
            'up': 3,
            'lower_next': 4,
            'upper_next': 5,
            'metropolis': 6,
        })

        ### action space and state space
        self.action_space = spaces.Discrete(len(self.name_mapping))
        self.observation_space = spaces.Box(low=-1.0,
                                            high=1.0,
                                            shape=(self.L, self.L, 4))
        self.reward_range = (-1, 1)

        # output file
        self.ofilename = 'loop_sites.log'
        # render file
        self.rfilename = 'loop_renders.log'
        # save log to json for future analysis
        self.json_file = 'env_history.json'

        self.stacked_axis = 2

        ## counts reset()
        self.episode_counter = 0

        ## ray test
        self.auto_metropolis = False
        # ray add list:
        #     1. log 2D (x, y) in self.ofilename
        #     2. add self.calculate_area() and loop_area
        #     3. auto_6 (uncompleted)

    def step(self, action):
        terminate = False
        reward = 0.0
        obs = None
        rets = [0.0, 0.0, 0.0, 0.0]
        metropolis_executed = False

        ## execute different type of actions
        if (action == 6):
            self.sim.flip_trajectory()
            rets = self.sim.metropolis()
            metropolis_executed = True
        elif (0 <= action < 6):
            rets = self.sim.draw(action)

        is_aceept, dEnergy, dDensity, dConfig = rets

        # metropolis judgement
        if (metropolis_executed):
            if is_aceept > 0 and dConfig > 0:
                self.sim.update_config()
                print('[GAME_ENV] PROPOSAL ACCEPTED!')
                total_steps = self.sim.get_total_steps()
                ep_steps = self.sim.get_ep_step_counter()
                ep = self.sim.get_episode()
                loop_length = self.sim.get_accepted_length()[-1]
                loop_area = self.calculate_area()
                update_times = self.sim.get_updated_counter()
                reward = 1.0 * (
                    loop_length / 4.0
                )  # reward with different length by normalizing with len 4 elements

                # output to self.ofilename
                with open(self.ofilename, 'a') as f:
                    f.write('1D: {}, \n(2D: {})\n'.format(
                        self.sim.get_trajectory(),
                        self.conver_1Dto2D(self.sim.get_trajectory())))
                    print('\tSave loop configuration to file: {}'.format(
                        self.ofilename))

                print('\tTotal accepted number = {}'.format(
                    self.sim.get_updated_counter()))
                print('\tAccepted loop length = {}, area = {}'.format(
                    loop_length, loop_area))
                print('\tAgent walks {} steps in episode, action counters: {}'.
                      format(ep_steps, self.sim.get_ep_action_counters()))
                action_counters = self.sim.get_action_statistics()
                action_stats = [x / total_steps for x in action_counters]
                print(
                    '\tStatistics of actions all episodes (ep={}, steps={}) : {}'
                    .format(ep, total_steps, action_stats))
                print('\tAcceptance ratio (accepted/total Eps) = {}%'.format(
                    update_times * 100.0 / ep))

                self.dump_env_states()

                self.render()
                self.sim.clear_buffer()
            else:
                self.sim.clear_buffer()
                terminate = True
                # Avoid running metropolis at start
                if (rets[3] == 0.0):
                    reward = -0.8
            # reset or update
        else:
            reward = self._stepwise_weighted_returns(rets)
            # as usual

        obs = self.get_obs()
        ## add timeout mechanism?

        return obs, reward, terminate, rets

    # Start function used for agent learing
    def start(self, init_site=None):
        if init_site == None:
            init_agent_site = self.sim.start(rnum(self.N))
        else:
            init_agent_site = self.sim.start(init_site)
        assert (init_site == init_agent_site)

    def reset(self):
        ## clear buffer and set new start of agent
        site = rnum(self.N)
        init_site = self.sim.restart(site)
        assert (init_site == site)
        self.episode_counter += 1
        return self.get_obs()

    def timeout(self):
        return self.sim.timeout()

    @property
    def agent_site(self):
        return self.sim.get_agent_site()

    @property
    def action_name_mapping(self):
        return self.name_mapping

    @property
    def name_action_mapping(self):
        return self.index_mapping

    def _stepwise_weighted_returns(self, rets):
        icemove_w = 0.000
        energy_w = -1.0
        defect_w = 0.0
        baseline = 0.009765625  ## 1 / 1024
        scaling = 2.0
        return (icemove_w * rets[0] + energy_w * rets[1] + defect_w * rets[2] +
                baseline) * scaling

    ## ray test  (for: int, list, np_list)
    def conver_1Dto2D(self, input_1D):
        output_2D = None
        if type(input_1D) == int:
            output_2D = (int(input_1D / self.L), int(input_1D % self.L))
        elif type(input_1D) == list:
            output_2D = []
            for position in input_1D:
                output_2D.append(
                    (int(position / self.L), int(position % self.L)))
        return output_2D

    ## ray test
    def calculate_area(self):
        traj_2D = self.conver_1Dto2D(self.sim.get_trajectory())
        traj_2D_dict = {}
        for x, y in traj_2D:
            if x in traj_2D_dict:
                traj_2D_dict[x].append(y)
            else:
                traj_2D_dict[x] = [y]

        # check Max y_length
        y_position_list = []
        for y_list in traj_2D_dict.values():
            y_position_list = y_position_list + y_list
        y_position_list = list(set(y_position_list))
        max_y_length = len(y_position_list) - 1

        area = 0.0
        for x in traj_2D_dict:
            diff = max(traj_2D_dict[x]) - min(traj_2D_dict[x])
            if diff > max_y_length:
                diff = max_y_length
            temp_area = diff - len(
                traj_2D_dict[x]) + 1  ## avoid vertical straight line
            if temp_area > 0:
                area = area + temp_area

        return area

    def render(self, mapname='traj', mode='ansi', close=False):
        #of = StringIO() if mode == 'ansi' else sys.stdout
        #print ('Energy: {}, Defect: {}'.format(self.sqice.cal_energy_diff(), self.sqice.cal_defect_density()))
        s = None
        if (mapname == 'traj'):
            s = self._transf2d(self.sim.get_canvas_map())
        start = self.sim.get_start_point()
        start = (int(start / self.L), int(start % self.L))
        s[start] = 3
        screen = '\r'
        screen += '\n\t'
        screen += '+' + self.L * '---' + '+\n'
        for i in range(self.L):
            screen += '\t|'
            for j in range(self.L):
                p = (i, j)
                spin = s[p]
                if spin == -1:
                    screen += ' o '
                elif spin == +1:
                    screen += ' * '
                elif spin == 0:
                    screen += '   '
                elif spin == +2:
                    screen += ' @ '
                elif spin == -2:
                    screen += ' O '
                elif spin == +3:
                    screen += ' x '
            screen += '|\n'
        screen += '\t+' + self.L * '---' + '+\n'
        #sys.stdout.write(screen)
        with open(self.rfilename, 'a') as f:
            f.write('Episode: {}, global step = {}\n'.format(
                self.episode_counter, self.sim.get_total_steps()))
            f.write('{}\n'.format(screen))

    def get_obs(self):
        config_map = self._transf2d(self.sim.get_state_t_map())
        canvas_map = self._transf2d(self.sim.get_canvas_map())
        energy_map = self._transf2d(self.sim.get_energy_map())
        defect_map = self._transf2d(self.sim.get_defect_map())

        return np.stack([config_map, canvas_map, energy_map, defect_map],
                        axis=self.stacked_axis)

    @property
    def unwrapped(self):
        """Completely unwrap this env.
            Returns:
                gym.Env: The base non-wrapped gym.Env instance
        """
        return self

    def sysinfo(self):
        print('')

    def _transf2d(self, s):
        # do we need zero mean here?
        return np.array(s, dtype=np.float32).reshape([self.L, self.L])

    def _append_record(self, record):
        with open(self.json_file, 'a') as f:
            json.dump(record, f)
            f.write(os.linesep)

    def dump_env_states(self):
        # get current timestamp
        total_steps = self.sim.get_total_steps()
        ep = self.sim.get_episode()
        # agent walk # steps in this episode
        ep_step_counters = self.sim.get_ep_step_counter()
        trajectory = self.sim.get_trajectory()
        if self.sim.get_accepted_length():
            loop_length = self.sim.get_accepted_length()[-1]
        else:
            loop_length = 0
        enclosed_area = self.calculate_area()
        update_times = self.sim.get_updated_counter()
        action_counters = self.sim.get_action_statistics()
        action_stats = [x / total_steps for x in action_counters]

        start_site = self.sim.get_start_point()
        acceptance = update_times * 100.0 / ep

        d = {
            'Episode': ep,
            'Steps': total_steps,
            'StartSite': start_site,
            'Trajectory': trajectory,
            'UpdateTimes': update_times,
            'AcceptanceRatio': acceptance,
            'LoopLength': loop_length,
            'EnclosedArea': enclosed_area,
            'ActionStats': action_stats
        }

        self._append_record(d)
예제 #5
0
    def __init__(self, L, kT, J):
        self.L = L
        self.kT = kT
        self.J = J
        self.N = L**2
        num_neighbors = 2
        num_replicas = 1
        num_mcsteps = 2000
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1

        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature(self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.name_mapping = dict({
            0: 'right',
            1: 'down',
            2: 'left',
            3: 'up',
            4: 'lower_next',
            5: 'upper_next',
            6: 'metropolis',
        })

        self.index_mapping = dict({
            'right': 0,
            'down': 1,
            'left': 2,
            'up': 3,
            'lower_next': 4,
            'upper_next': 5,
            'metropolis': 6,
        })

        ### action space and state space
        self.action_space = spaces.Discrete(len(self.name_mapping))
        self.observation_space = spaces.Box(low=-1.0,
                                            high=1.0,
                                            shape=(self.L, self.L, 4))
        self.reward_range = (-1, 1)

        # output file
        self.ofilename = 'loop_sites.log'
        # render file
        self.rfilename = 'loop_renders.log'
        # save log to json for future analysis
        self.json_file = 'env_history.json'

        self.stacked_axis = 2

        ## counts reset()
        self.episode_counter = 0

        ## ray test, add list:
        #     1. log 2D (x, y) in self.ofilename
        #     2. add self.calculate_area() and loop_area
        #     3. auto_metropolis (uncompleted)
        #   8/2:
        #     5. add save_record_dict(): to record the amount of length and area
        #     6. add area_reward in step()
        #     7. add hundred_test(): to count accepted ratio in last 100 episodes
        #     8. add now_position & path flag in get_obs()
        self.record_dict = [{}, {}]
        self.accepted_in_hundred = 0
        self.accepted_in_hundred_stack = []

        ## ray test, step_setting
        self.area_reward = True  # use both length & area to calculate the reward
        self.auto_metropolis = False  # if the condition is OK, auto execute metropolis
        self.metropolis_terminal = False  # if metropolis_executed == True, terminal = True
        self.strict_step = False  # if rets[0] (is_aceept) == -1, terminal = True
예제 #6
0
    def __init__ (self, L, kT, J, is_cont=False):
        """IceGame
            is_cont (bool):
                Set True that action is continous variable; set Fasle using discrete action.
        """
        self.L = L
        self.kT = kT
        self.J = J
        self.N = L**2
        self.is_cont = is_cont
        num_neighbors = 2
        num_replicas = 1
        num_mcsteps = 2000
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1

        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature (self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.last_update_step = 0

        """
            History FIFO
        """
        self.Imap = I = np.ones([self.L, self.L])
        self.Omap = O = np.zeros([self.L, self.L])
        if HIST_LEN > 0:
            self.canvas_hist = deque([O] * HIST_LEN)
            self.defect_hist = deque([O] * HIST_LEN)

        self.idx2act = dict({
                            0 :   "right",
                            1 :   "down",
                            2 :   "left",
                            3 :   "up",
                            4 :   "lower_next",
                            5 :   "upper_next",
                            6 :   "metropolis"
                            })

        self.act2idx  = {v: k for k, v in self.idx2act.items()}

        # action space and state space
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, NUM_OBSERVATION_MAPS))
        if is_cont:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(len(self.idx2act)))
        else:
            self.action_space = spaces.Discrete(len(self.idx2act))

        #TODO: make more clear definition
        """
            Global Observations:
                *
            Local Observations:
                * neighboring spins up & down
                *
        """
        self.global_observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 2))
        self.local_observation_space = spaces.Discrete(7)

        self.reward_range = (-1, 1)

        # output file
        self.ofilename = "loop_sites.log"
        # render file
        self.rfilename = "loop_renders.log"
        # save log to json for future analysis
        self.json_file = "env_history.json"

        self.stacked_axis = 2

        ## counts reset()
        self.episode_counter = 0
        self.lives = DEFAULT_LIVES

        ## legacy codes
        self.auto_metropolis = False
예제 #7
0
class IceGameEnv(core.Env):
    def __init__ (self, L, kT, J, is_cont=False):
        """IceGame
            is_cont (bool):
                Set True that action is continous variable; set Fasle using discrete action.
        """
        self.L = L
        self.kT = kT
        self.J = J
        self.N = L**2
        self.is_cont = is_cont
        num_neighbors = 2
        num_replicas = 1
        num_mcsteps = 2000
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1

        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature (self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.last_update_step = 0

        """
            History FIFO
        """
        self.Imap = I = np.ones([self.L, self.L])
        self.Omap = O = np.zeros([self.L, self.L])
        if HIST_LEN > 0:
            self.canvas_hist = deque([O] * HIST_LEN)
            self.defect_hist = deque([O] * HIST_LEN)

        self.idx2act = dict({
                            0 :   "right",
                            1 :   "down",
                            2 :   "left",
                            3 :   "up",
                            4 :   "lower_next",
                            5 :   "upper_next",
                            6 :   "metropolis"
                            })

        self.act2idx  = {v: k for k, v in self.idx2act.items()}

        # action space and state space
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, NUM_OBSERVATION_MAPS))
        if is_cont:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(len(self.idx2act)))
        else:
            self.action_space = spaces.Discrete(len(self.idx2act))

        #TODO: make more clear definition
        """
            Global Observations:
                *
            Local Observations:
                * neighboring spins up & down
                *
        """
        self.global_observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 2))
        self.local_observation_space = spaces.Discrete(7)

        self.reward_range = (-1, 1)

        # output file
        self.ofilename = "loop_sites.log"
        # render file
        self.rfilename = "loop_renders.log"
        # save log to json for future analysis
        self.json_file = "env_history.json"

        self.stacked_axis = 2

        ## counts reset()
        self.episode_counter = 0
        self.lives = DEFAULT_LIVES

        ## legacy codes
        self.auto_metropolis = False
        # ray add list:
        #     1. log 2D (x, y) in self.ofilename
        #     2. add self.calculate_area() and loop_area
        #     3. auto_6 (uncompleted)

    def step(self, action):
        """step function with directional action
        """
        if self.is_cont:
            # actions are 7 continuous variables, pick the largest one
            action = np.argmax(action)

        terminate = False
        reward = 0.0 # -0.000975 # stepwise punishment.
        obs = None
        info = None
        rets = [0.0, 0.0, 0.0, 0.0]
        metropolis_executed = False

        ## execute different type of actions
        if (action == 6):
            self.sim.flip_trajectory()
            rets = self.sim.metropolis()
            metropolis_executed = True
        elif (0 <= action < 6) :
            rets = self.sim.draw(action)

        """ Results from icegame
            index 0 plays two roles:
                if action is walk:
                    rets[0] = is_icemove
                elif action is metropolis:
                    rets[0] = is_accept
        """
        is_accept, dEnergy, dDensity, dConfig = rets
        is_icemove = True if is_accept > 0.0 else False

        # metropolis judgement
        if (metropolis_executed):
            if is_accept > 0 and dConfig > 0:
                """ Updates Accepted
                    1. Calculate rewards
                    2. Save logs
                    3. Reset maps and buffers
                """
                self.sim.update_config()
                print ("[GAME_ENV] PROPOSAL ACCEPTED!")
                total_steps = self.sim.get_total_steps()
                ep_steps = self.sim.get_ep_step_counter()
                ep = self.sim.get_episode()
                loop_length = self.sim.get_accepted_length()[-1]
                loop_area = self.calculate_area()

                # get counters
                action_counters = self.sim.get_action_statistics()
                metropolis_times = self.sim.get_updating_counter()
                update_times = self.sim.get_updated_counter()

                # compute update interval
                update_interval = total_steps - self.last_update_step
                self.last_update_step = total_steps

                # acceptance rate
                total_acc_rate = self.sim.get_total_acceptance_rate() * 100.0
                effort =  update_times/total_steps * 100.0
                reward = 1.0 * (loop_length / LOOP_UNIT_REWARD) # reward with different length by normalizing with len 4 elements

                # TODO: Calculate recent # steps' acceptance rate

                # output to self.ofilename
                with open(self.ofilename, "a") as f:
                    f.write("1D: {}, \n(2D: {})\n".format(self.sim.get_trajectory(), self.convert_1Dto2D(self.sim.get_trajectory())))
                    print ("\tSave loop configuration to file: {}".format(self.ofilename))

                print ("\tTotal accepted number = {}".format(update_times))
                print ("\tAccepted loop length = {}, area = {}".format(loop_length, loop_area))
                print ("\tAgent walks {} steps in episode, action counters: {}".format(ep_steps, self.sim.get_ep_action_counters()))
                action_stats = [x / total_steps for x in action_counters]
                print ("\tStatistics of actions all episodes (ep={}, steps={}) : {}".format(ep, total_steps, action_stats))
                print ("\tAcceptance ratio (accepted/ # of metropolis) = {}%".format(
                                                                    update_times * 100.0 / metropolis_times))
                print ("\tAcceptance ratio (from icegame) = {}%".format(total_acc_rate))
                print ("\tRunning Effort = {}%".format(effort))

                # TODO: How to describe the loop?
                info = {
                    "Acceptance Ratio" : total_acc_rate,
                    "Running Effort": effort,
                    "Updated" : update_times,
                    "Loop Size": loop_length,
                    "Loop Area": loop_area,
                }

                # Stop rendering, it save huge log
                # self.render()
                self.dump_env_states()
                self.sim.clear_buffer()

                """ Terminate?
                    stop after accpetance, will increase the episode rewards.
                    But can we still running the program to increase the total rewards?

                    Or do not terminate, just reset the location?
                """
                # terminate = True
                self.sim.restart(rnum(self.N))

            else:
                self.sim.clear_buffer()
                self.lives -= 1
                """
                    Rejection
                        1. Keep updating with new canvas.
                            or
                        Early stop.
                        2. Wrong decision penalty

                """
                reward = -0.001
                #self.episode_terminate = True
                #terminate = True
                # Avoid running metropolis at start (Too hand-crafted method!)
                #if (rets[3] == 0.0):
                #    reward = -0.8

            # reset or update
        else:
            """Stepwise feedback:
                1. exploration
                2. icemove reards
                3. defect propagation guiding
                4. #more

                TODO: Write option in init arguments.
            """
            #reward = self._stepwise_weighted_returns(rets)
            # Check each scale (each of them stays in 0~1)
            #reward = 0.002 - (dEnergy + dDensity)
            #reward = -(dEnergy + dDensity) + dConfig
            if is_icemove:
                reward = .001
                #print ("is icemove: {}, {}".format(dEnergy, dDensity))
            else:
                reward = -.001
                #print ("not icemove: {}, {}".format(dEnergy, dDensity))
            # as usual

        obs = self.get_obs()
        #obs = self.get_hist_obs()

        ## add timeout mechanism?

        # Add the timeout counter
        if self.lives <= 0:
            terminate = True

        # Not always return info
        return obs, reward, terminate, info

    # Start function used for agent learning
    def start(self, init_site=None):
        """
            Returns: same as step()
                obs, reward, terminate, rets
        """
        if init_site == None:
            init_agent_site = self.sim.start(rnum(self.N))
        else:
            init_agent_site = self.sim.start(init_site)
        assert(self.agent_site == init_agent_site)
        self.episode_terminate = False
        self.lives = DEFAULT_LIVES

        return self.get_obs()
        #return self.get_hist_obs()

    def reset(self, site=None):
        ## clear buffer and set new start of agent
        if site is None:
            site = rnum(self.N)
        init_site = self.sim.restart(site)
        assert(init_site == site)
        self.episode_terminate = False
        self.episode_counter += 1
        self.lives = DEFAULT_LIVES
        # actually, counter can be called by sim.get_episode()

        # Clear the fifo queue
        if HIST_LEN > 0:
            self.canvas_hist.clear()
            self.defect_hist.clear()
            for _ in range(HIST_LEN):
                self.canvas_hist.append(self.Omap)
                self.defect_hist.append(self.Omap)

        info = None

        return self.get_obs()
        #return self.get_hist_obs()

    def timeout(self):
        return self.sim.timeout()

    @property
    def game_status(self):
        """Return whether game is terminate"""
        return self.episode_terminate

    def set_output_path(self, path):
        self.ofilename = os.path.join(path, self.ofilename)
        self.rfilename = os.path.join(path, self.rfilename)
        self.json_file = os.path.join(path, self.json_file)
        print ("Set environment logging to {}".format(self.ofilename))
        print ("Set loop and sites logging to {}".format(self.rfilename))
        print ("Set results dumpping path to {}".format(self.json_file))

    @property
    def agent_site(self):
        return self.sim.get_agent_site()

    @property
    def action_name_mapping(self):
        return self.idx2act

    @property
    def name_action_mapping(self):
        return self.act2idx

    def reward_function(self, rets):
        pass
        """ Different Reward Strategies Here
        """

    def _stepwise_weighted_returns(self, rets):
        icemove_w = 0.000
        energy_w = -1.0
        defect_w = 0.0
        baseline = 0.009765625 ## 1 / 1024
        scaling = 2.0
        return (icemove_w * rets[0] + energy_w * rets[1] + defect_w * rets[2] + baseline) * scaling

    ## ray test  (for: int, list, np_list)
    def convert_1Dto2D(self, input_1D):
        output_2D = None
        if type(input_1D) == int:
            output_2D = (int(input_1D/self.L), int(input_1D%self.L))
        elif type(input_1D) == list:
            output_2D = []
            for position in input_1D:
                output_2D.append((int(position/self.L), int(position%self.L)))
        return output_2D

    ## ray test
    def calculate_area(self):
        """TODO:
            The periodic boundary condition is too naive that can be modified.
        """
        traj_2D = self.convert_1Dto2D(self.sim.get_trajectory())
        traj_2D_dict = {}
        for x, y in traj_2D:
            if x in traj_2D_dict:
                traj_2D_dict[x].append(y)
            else:
                traj_2D_dict[x] = [y]

        # check Max y_length
        y_position_list = []
        for y_list in traj_2D_dict.values():
            y_position_list = y_position_list + y_list
        y_position_list = list(set(y_position_list))
        max_y_length = len(y_position_list) -1

        area = 0.0
        for x in traj_2D_dict:
            diff = max(traj_2D_dict[x]) - min(traj_2D_dict[x])
            if diff > max_y_length:
                diff = max_y_length
            temp_area = diff - len(traj_2D_dict[x]) +1  ## avoid vertical straight line
            if temp_area > 0:
                area = area + temp_area

        return area

    # TODO: Render on terminal.
    def render(self, mapname ="traj", mode="ansi", close=False):
        #of = StringIO() if mode == "ansi" else sys.stdout
        #print ("Energy: {}, Defect: {}".format(self.sqice.cal_energy_diff(), self.sqice.cal_defect_density()))
        s = None
        if (mapname == "traj"):
            s = self._transf2d(self.sim.get_canvas_map())
        start = self.sim.get_start_point()
        start = (int(start/self.L), int(start%self.L))
        s[start] = 3
        screen = "\r"
        screen += "\n\t"
        screen += "+" + self.L * "---" + "+\n"
        for i in range(self.L):
            screen += "\t|"
            for j in range(self.L):
                p = (i, j)
                spin = s[p]
                if spin == -1:
                    screen += " o "
                elif spin == +1:
                    screen += " * "
                elif spin == 0:
                    screen += "   "
                elif spin == +2:
                    screen += " @ "
                elif spin == -2:
                    screen += " O "
                elif spin == +3:
                    # starting point
                    screen += " x "
            screen += "|\n"
        screen += "\t+" + self.L * "---" + "+\n"
        #TODO: Add choice write to terminal or file
        #sys.stdout.write(screen)
        with open(self.rfilename, "a") as f:
            f.write("Episode: {}, global step = {}\n".format(self.episode_counter, self.sim.get_total_steps()))
            f.write("{}\n".format(screen))

    def get_obs(self):
        """
            Need more flexible in get_obs. There will may be config, sequence, scalar observed states.
            TODO: add np.nan_to_num() to prevent ill value
        """
        config_map = self._transf2d(self.sim.get_state_t_map_color())
        #config_map = self._transf2d(self.sim.get_state_t_map())
        valid_map = self._transf2d(self.sim.get_valid_action_map())
        canvas_map = self._transf2d(self.sim.get_canvas_map())
        energy_map = self._transf2d(self.sim.get_energy_map())
        defect_map = self._transf2d(self.sim.get_defect_map())

        return np.stack([config_map,
                        valid_map,
                        canvas_map,
                        energy_map,
                        defect_map
        ], axis=self.stacked_axis)

    def get_hist_obs(self):
        config_map = self._transf2d(self.sim.get_state_t_map_color())
        valid_map = self._transf2d(self.sim.get_valid_action_map())
        canvas_map = self._transf2d(self.sim.get_canvas_map())
        energy_map = self._transf2d(self.sim.get_energy_map())
        defect_map = self._transf2d(self.sim.get_defect_map())

        self.canvas_hist.append(canvas_map)
        self.canvas_hist.popleft()
        self.defect_hist.append(defect_map)
        self.defect_hist.popleft()

        canvas_traj = np.stack([canvas for canvas in self.canvas_hist], axis=self.stacked_axis)
        defect_traj = np.stack([dmap for dmap in self.defect_hist], axis=self.stacked_axis)
        config_map = np.expand_dims(config_map, axis=self.stacked_axis)
        valid_map = np.expand_dims(valid_map, axis=self.stacked_axis)
        energy_map = np.expand_dims(energy_map, axis=self.stacked_axis)

        return np.concatenate([config_map,
                        valid_map,
                        energy_map,
                        canvas_traj,
                        defect_traj
                        ], axis=self.stacked_axis)

    def get_partial_obs(self):
        """Partial Observation:
            Get the multiple channel (different format) from the same states.

            Return:
                local: neighboring relation of the state_t
                global: whole maps of the state_t
        """
        pass

    @property
    def unwrapped(self):
        """Completely unwrap this env.
            Returns:
                gym.Env: The base non-wrapped gym.Env instance
        """
        return self

    def sysinfo(self):
        print ("")

    def _transf2d(self, s):
        # do we need zero mean here?
        return np.array(s, dtype=np.float32).reshape([self.L, self.L])

    def _append_record(self, record):
        with open(self.json_file, "a") as f:
            json.dump(record, f)
            f.write(os.linesep)

    def dump_env_states(self):
        # get current timestamp
        total_steps = self.sim.get_total_steps()
        ep = self.sim.get_episode()
        # agent walk # steps in this episode
        ep_step_counters = self.sim.get_ep_step_counter()
        trajectory = self.sim.get_trajectory()
        if self.sim.get_accepted_length():
            loop_length = self.sim.get_accepted_length()[-1]
        else :
            loop_length = 0
        enclosed_area = self.calculate_area()
        update_times = self.sim.get_updated_counter()
        action_counters = self.sim.get_action_statistics()
        action_stats = [x / total_steps for x in action_counters]

        start_site = self.sim.get_start_point()
        acceptance = update_times * 100.0 / ep

        d = {
            "Episode": ep,
            "Steps"  : total_steps,
            "StartSite"  : start_site,
            "Trajectory": trajectory,
            "UpdateTimes": update_times,
            "AcceptanceRatio" : acceptance,
            "LoopLength": loop_length,
            "EnclosedArea": enclosed_area,
            "ActionStats" : action_stats
        }

        self._append_record(d)
예제 #8
0
from icegame import SQIceGame, INFO
import numpy as np
import matplotlib.pyplot as plt

# physical parameters
L = 4
kT = 0.0001
J = 1
N = L**2

num_neighbors = 2
num_replicas = 1
num_mcsteps = 2000
num_bins = 1
num_thermalization = num_mcsteps
tempering_period = 1

mc_info = INFO(L, N, num_neighbors, num_replicas, num_bins, num_mcsteps,
               tempering_period, num_thermalization)

# initalize the system, lattice config
sim = SQIceGame(mc_info)
sim.set_temperature(kT)
sim.init_model()
sim.mc_run(num_mcsteps)

sim.start(0)
예제 #9
0
    def __init__ (self, L, kT, J, 
                    stepwise_reward="constant",
                    end_reward="loopsize",
                    terminate_mode="trial",
                    obs_type="multi",
                ):
        """IceGame
            *** Considering more action and state spaces. Use autocorr as reward. ***
          Args:
            stepwise:
            endreward:
            terminate_mode:
                * metro: Each time metropolis is executed, then call it an episode.
                * trial: Finite trial times each episodes
            obs_type (observation type):
                * multi:
                * global_local:
            reset_each_epsidoes:
                reset configuration each # of episodes
        """
        self.L = L
        self.kT = kT
        self.J = J
        self.N = 4*L**2
        self.sL = int(np.sqrt(self.N)) # square length L 
        self.stepwis = stepwise_reward
        self.endreward = end_reward
        self.terminate_mode = terminate_mode
        self.obs_type = obs_type
        num_neighbors = 1
        num_replicas = 1
        num_mcsteps = 10000
        self.num_mcsteps = num_mcsteps
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1
        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature (self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.last_update_step = 0
        # why do we need to keep track last returned results?
        self.last_rets = None
        self.center = None # used for sliding window
        self.use_subregion = False

        # Extend the action to 8+1 = 9 actions
        self.idx2act = dict({
                            0 :   "head_0",
                            1 :   "head_1",
                            2 :   "head_2",
                            3 :   "tail_0",
                            4 :   "tail_1",
                            5 :   "tail_2",
                            6 :   "metropolis",
                            })

        self.act2idx  = {v: k for k, v in self.idx2act.items()}

        # action space and state space

        # global_observation_space
        self.global_observation_space = spaces.Box(low=-1, high=1.0,
            shape=(self.sL, self.sL, 1), dtype=np.float32)
        # local_observation_space (neighbor + agent + physical obs)
        self.local_observation_space = spaces.Discrete(10)
        self.action_space = spaces.Discrete(len(self.idx2act))
        self.reward_range = (-1, 1)

        # for convention (legacy code)
        self.observation_space = spaces.Box(low=-1, high=1.0,
            shape=(self.L, self.L, 4), dtype=np.float32)

        # reference configuration: buffer for initial config each episode
        self.refconfig = None

        # TODO: Scheduling reward scale
        self.reward_scale = 1.0
        self.reward_threshold = 0.0
        self.reward_trajectory = []

        """Choose Observation Function
        """

        self.cfg_outdir = "configs"
        # output file
        self.ofilename = "loop_sites.log"
        # render file
        self.rfilename = "loop_renders.log"
        # save log to json for future analysis
        self.json_file = "env_history.json"
        # Need more info writing down in env settings
        self.env_settinglog_file = "env_settings.json"

        self.stacked_axis = 2
예제 #10
0
class IcegameEnv(core.Env):
    def __init__ (self, L, kT, J, 
                    stepwise_reward="constant",
                    end_reward="loopsize",
                    terminate_mode="trial",
                    obs_type="multi",
                ):
        """IceGame
            *** Considering more action and state spaces. Use autocorr as reward. ***
          Args:
            stepwise:
            endreward:
            terminate_mode:
                * metro: Each time metropolis is executed, then call it an episode.
                * trial: Finite trial times each episodes
            obs_type (observation type):
                * multi:
                * global_local:
            reset_each_epsidoes:
                reset configuration each # of episodes
        """
        self.L = L
        self.kT = kT
        self.J = J
        self.N = 4*L**2
        self.sL = int(np.sqrt(self.N)) # square length L 
        self.stepwis = stepwise_reward
        self.endreward = end_reward
        self.terminate_mode = terminate_mode
        self.obs_type = obs_type
        num_neighbors = 1
        num_replicas = 1
        num_mcsteps = 10000
        self.num_mcsteps = num_mcsteps
        num_bins = 1
        num_thermalization = num_mcsteps
        tempering_period = 1
        self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \
                num_bins, num_mcsteps, tempering_period, num_thermalization)

        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature (self.kT)
        self.sim.init_model()
        self.sim.mc_run(num_mcsteps)

        self.episode_terminate = False
        self.accepted_episode = False

        self.last_update_step = 0
        # why do we need to keep track last returned results?
        self.last_rets = None
        self.center = None # used for sliding window
        self.use_subregion = False

        # Extend the action to 8+1 = 9 actions
        self.idx2act = dict({
                            0 :   "head_0",
                            1 :   "head_1",
                            2 :   "head_2",
                            3 :   "tail_0",
                            4 :   "tail_1",
                            5 :   "tail_2",
                            6 :   "metropolis",
                            })

        self.act2idx  = {v: k for k, v in self.idx2act.items()}

        # action space and state space

        # global_observation_space
        self.global_observation_space = spaces.Box(low=-1, high=1.0,
            shape=(self.sL, self.sL, 1), dtype=np.float32)
        # local_observation_space (neighbor + agent + physical obs)
        self.local_observation_space = spaces.Discrete(10)
        self.action_space = spaces.Discrete(len(self.idx2act))
        self.reward_range = (-1, 1)

        # for convention (legacy code)
        self.observation_space = spaces.Box(low=-1, high=1.0,
            shape=(self.L, self.L, 4), dtype=np.float32)

        # reference configuration: buffer for initial config each episode
        self.refconfig = None

        # TODO: Scheduling reward scale
        self.reward_scale = 1.0
        self.reward_threshold = 0.0
        self.reward_trajectory = []

        """Choose Observation Function
        """

        self.cfg_outdir = "configs"
        # output file
        self.ofilename = "loop_sites.log"
        # render file
        self.rfilename = "loop_renders.log"
        # save log to json for future analysis
        self.json_file = "env_history.json"
        # Need more info writing down in env settings
        self.env_settinglog_file = "env_settings.json"

        self.stacked_axis = 2

        ## counts reset()

    def auto_step(self):
        # auto_step works as long loop algorithm.
        guides = self.sim.guide_action()
        # Or, we can execute metropolis when guide fails
        E, D, dC = self.sim.get_phy_observables()
        if (E == -1):
            act = self.name_action_mapping["metropolis"]
        else:
            act = np.random.choice(guides)
        return self.step(act)

    def step(self, action):
        """Step function
            Args: action
            Returns: obs, reward, done, info

            TODO:
                Taking nested list of actions as a single 'action' on markov chain transition.
        """
        terminate = False
        reward = 0.0
        obs = None
        info = None
        metropolis_executed = False

        ## execute different type of actions
        ## maybe we need a better action index
        if (action == 6):
            self.sim.flip_trajectory()
            rets = self.sim.metropolis()
            metropolis_executed = True
        elif (0 <= action < 6) :
            rets = self.sim.move(action)

        """ Results from icegame
            index 0 plays two roles:
                if action is walk:
                    rets[0] = is_icemove
                elif action is metropolis:
                    rets[0] = is_accept
        """
        is_accept, dEnergy, dConfig = rets
        is_icemove = True if is_accept > 0.0 else False
        self.last_rets = rets

        # metropolis judgement
        if (metropolis_executed):
            """TODO: Add autocorr of config here.
            """

            if is_accept > 0 and dConfig > 0:
                """ Updates Accepted
                    1. Calculate rewards
                      1.1 Get current configuration before updating
                      1.2 calculate the inner product
                      1.3 reward = 1.0 - autocorr
                    2. Save logs
                    3. Reset maps and buffers
                """
                #current_config = self._transf2d(self.sim.get_state_tp1_map_color())
                #statevec = transf_binary_vector(current_config)

                self.sim.update_config()
                print ("[GAME_ENV] PROPOSAL ACCEPTED!")

                total_steps = self.sim.get_total_steps()
                ep_steps = self.sim.get_ep_step_counter()
                ep = self.sim.get_episode()
                loop_length = self.sim.get_accepted_length()[-1]
                loop_area = self.calculate_area()

                # get counters
                action_counters = self.sim.get_action_statistics()
                metropolis_times = self.sim.get_updating_counter()
                updated_times = self.sim.get_updated_counter()

                # compute update interval
                update_interval = total_steps - self.last_update_step
                self.last_update_step = total_steps

                # acceptance rate
                total_acc_rate = self.sim.get_total_acceptance_rate() * 100.0
                #effort =  updated_times/total_steps * 100.0
                effort = loop_length / ep_steps * 100.0
                # calculate the metropolis reward
                #acorr = autocorr(statevec, self.refconfig)
                #reward = (1.0 - acorr) * self.reward_scale
                reward = 1.0

                # TODO: Calculate recent # steps' acceptance rate
                """Dump resutls into file.
                    TODO: Different counter
                """
                # output to self.ofilename: NOTE: No need to save this, all info in hist.json.
                #with open(self.ofilename, "a") as f:
                #    f.write("1D: {}, \n(2D: {})\n".format(self.sim.get_trajectory(), self.convert_1Dto2D(self.sim.get_trajectory())))
                #    print ("\tSave loop configuration to file: {}".format(self.ofilename))

                print ("\tGlobal step: {}, Local step: {}".format(total_steps, ep_steps))
                print ("\tTotal accepted number = {}".format(updated_times))
                print ("\tTotal Metropolis number = {}".format(metropolis_times))
                print ("\tAccepted loop length = {}, area = {}".format(loop_length, loop_area))
                print ("\tAgent walks {} steps in episode, action counters: {}".format(ep_steps, self.sim.get_ep_action_counters()))
                action_stats = [x / total_steps for x in action_counters]
                print ("\tStatistics of actions all episodes (ep={}, steps={}) : {}".format(ep, total_steps, action_stats))
                print ("\tAcceptance ratio (accepted/ # of metropolis) = {}%".format(
                                                                    updated_times * 100.0 / metropolis_times))
                print ("\tAcceptance ratio (accepted/ # of episodes) = {}%".format(
                                                                    updated_times * 100.0 / ep))
                print ("\tAcceptance ratio (from icegame) = {}%".format(total_acc_rate))
                print ("\tRunning Effort = {}%".format(effort))

                # TODO: How to describe the loop?
                info = {
                    "Acceptance Ratio" : total_acc_rate,
                    "Running Effort": effort,
                    "Updated" : updated_times,
                    "Loop Size": loop_length,
                    "Loop Area": loop_area,
                }

                # Render when special case happened.
                #if loop_area >= 1 or loop_length >= 8:
                    #self.render()
                self.dump_env_status()
                self.sim.clear_buffer()

                """ Terminate?
                    stop after accpetance, will increase the episode rewards.
                    But can we still running the program to increase the total rewards?
                    Or do not terminate, just reset the location?
                """

                # reset the initial position and clear buffer
                # TODO: Check the difference
                # no need to reset here
                # self.sim.reset(rnum(self.N))
                terminate = True

            else:
                """
                    Rejection or dConfig == 0
                        1. Keep updating with new canvas.
                            or
                        Early stop.
                        2. Wrong decision penalty
                    Q: Should we reset the initial location?
                    Q: How to handle no config change?
                    Q: This should be some penalty here.
                """

                self.sim.clear_buffer()
                reward = 0.0
                terminate = True
            # reset or update
        else:
            """Stepwise feedback:
                1. exploration
                2. icemove reards
                3. defect propagation guiding
                4. #more
                TODO: Write option in init arguments.
            """
            # Check each scale (each of them stays in 0~1)

            # TODO: calculate reward wrt physical observation
            _, diffeng_level, _ = self._discrete_criteron(self.physical_observables)

            # asymmetric reward doest work well.
            # 100 --> L*L --> N
            reward = diffeng_level / (self.L * self.L)
            #reward = diffeng_level / 100

            # Reset if timeout from env.
            if (self.sim.timeout()):
                terminate = True

        obs = self.get_obs()

        # Add the timeout counter (TODO: Check these codes)
        # Terminate and run monte carlo to prepare new config
        #terminate = True

        ### TODO: Add configuration reset counter!
        #print ("[GAME_ENV] Reset Ice Configuration!")
        #self.sim.reset_config()

        # Not always return info
        self.reward_trajectory.append(reward)
        return obs, reward, terminate, info

    # Start function used for agent learning
    def start(self, init_site=None, create_defect=True):
        """
            Q: Do we flip at start?
                I think flip @ starting point is reasonable.
            Returns: same as step()
                obs, reward, terminate, rets
        """
        if init_site == None:
            init_agent_site = self.sim.start(rnum(self.N))
        else:
            init_agent_site = self.sim.start(init_site)
        assert(self.agent_site == init_agent_site)
        if create_defect:
            self.sim.flip()
        # remove this legacy?
        self.center = self.agent_site2d

        state = self.get_obs()
        # reference configuration
        #self.refconfig = transf_binary_vector(state.configs_2d[:,:,0])

        return state

    def reset(self, site=None, create_defect=True):
        """reset is called by RL convention.
        """
        ## clear buffer and set new start of agent
        if site is None:
            site = rnum(self.N)
        init_site = self.sim.reset(site)
        assert(init_site == site)
        # actually, counter can be called by sim.get_episode()

        self.center = self.agent_site2d

        if create_defect:
            self.sim.flip()

        """TODO
            This mechanism should be checked.
            Reset configuration: run monte carlo again.
        """
        #print ("[GAME_ENV] Reset Ice Configuration!")
        #self.sim.reset_config()

        info = None
        self.last_rets = None
        self.reward_trajectory = []

        state = self.get_obs()
        # reference configuration
        # self.refconfig = transf_binary_vector(state.configs_2d[:,:,0])

        return state

    def timeout(self):
        return self.sim.timeout()

    def get_game_status(self):
        """(TODO)Return the game status including steps and physical observables. 
          returns:
        """
        total_steps = self.sim.get_total_steps()
        ep_steps = self.sim.get_ep_step_counter()
        ep = self.sim.get_episode()

        # get counters
        metropolis_times = self.sim.get_updating_counter()
        update_times = self.sim.get_updated_counter()

        # compute update interval
        update_interval = total_steps - self.last_update_step

        # acceptance rate
        total_acc_rate = self.sim.get_total_acceptance_rate() * 100.0
        effort =  update_times/total_steps * 100.0

        d = {
            "total_steps": total_steps,
            "updated_times": update_times,
        }

        return AttrDict(d)

    def set_output_path(self, path):
        if not os.path.exists(path):
            os.mkdir(path)
        self.cfg_outdir = os.path.join(path, self.cfg_outdir)
        if not os.path.exists(self.cfg_outdir):
            os.mkdir(self.cfg_outdir)
        self.ofilename = os.path.join(path, self.ofilename)
        self.rfilename = os.path.join(path, self.rfilename)
        self.json_file = os.path.join(path, self.json_file)
        self.env_settinglog_file = os.path.join(path, self.env_settinglog_file)
        print ("Set results dumpping path to {}".format(self.json_file))
        print ("Set env setting log path to {}".format(self.env_settinglog_file))

    @property
    def agent_site(self):
        return self.sim.get_agent_site()

    @property
    def agent_site2d(self):
        #TODO FIX
        return (self.sim.get_agent_site()//self.sL, self.sim.get_agent_site()%self.sL)

    def set_agent_site(self, site, clear_map=False):
        #Notice: sim.start() is just set agent on site,
        #   but not clear the maps. (call restart if needed.)
        if 0 <= site < self.N:
            if clear_map:
                self.sim.restart(site)
            else:
                self.sim.start(site)

    def enable_subregion(self):
        self.use_subregion = True

    def disable_subregion(self):
        self.use_subregion = False

    @property
    def action_name_mapping(self):
        return self.idx2act

    @property
    def name_action_mapping(self):
        return self.act2idx

    @property
    def physical_observables(self):
        return self.sim.get_phy_observables()

    # TODO: Need to replace these codes.
    ## ray test  (for: int, list, np_list)
    def convert_1Dto2D(self, input_1D):
        """This function is provided by Thisray.
            The problematic function, fixing is needed.
        """
        output_2D = None
        if type(input_1D) == int:
            output_2D = (int(input_1D/self.L), int(input_1D%self.L))
        elif type(input_1D) == list:
            output_2D = []
            # better use of list comprehension
            for position in input_1D:
                output_2D.append((int(position/self.L), int(position%self.L)))
        return output_2D

    def calculate_area(self):
        """TODO:
            The periodic boundary condition can be modified.
            This function is provided by Thisray.
            The problematic function, fixing is needed.
        """
        traj_2D = self.convert_1Dto2D(self.sim.get_trajectory())
        traj_2D_dict = {}
        for x, y in traj_2D:
            if x in traj_2D_dict:
                traj_2D_dict[x].append(y)
            else:
                traj_2D_dict[x] = [y]

        # check Max y_length
        y_position_list = []
        for y_list in traj_2D_dict.values():
            y_position_list = y_position_list + y_list
        y_position_list = list(set(y_position_list))
        max_y_length = len(y_position_list) -1

        area = 0.0
        for x in traj_2D_dict:
            diff = max(traj_2D_dict[x]) - min(traj_2D_dict[x])
            if diff > max_y_length:
                diff = max_y_length
            temp_area = diff - len(traj_2D_dict[x]) +1  ## avoid vertical straight line
            if temp_area > 0:
                area = area + temp_area

        return area

    def set_ice(self, s):
        """Convert numpy array into python list, then set_ice"""
        if type(s) == np.ndarray:
            s = s.tolist()
            self.sim.set_ice(s)
        elif type(s) == list:
            self.sim.set_ice(s)
        else:
            raise ValueError("Only numpy array or list are accepted.")

    def load_ice(self, path):
        """Read out ice configuration from npy."""
        loaded = np.load(path)
        self.set_ice(loaded)

    def save_ice(self):
        """Save out the ice configuration in numpy format."""
        s = self._transf1d(self.sim.get_state_t()) # convert into numpy array
        ep = self.sim.get_episode()
        fname = "ice_{}".format(ep)
        fname = os.path.join(self.cfg_outdir, fname)
        np.save(fname, s)
        print ("Save the initial configuration @ episode {} to {}".format(
            ep, self.cfg_outdir))

    def reset_ice_config(self):
        pass

    def resize_ice_config(self, L, mcsteps):
        """Resize the whole system."""
        # Resize the system size
        self.L = L
        self.num_mcsteps = mcsteps
        self.N = 4*L**2
        self.sL = int(np.sqrt(self.N)) # square length L 
        self.mc_info = INFO(self.L, self.N, 1, 1, 1, mcsteps, 1, mcsteps)
        # Allocate sim again.
        self.sim = SQIceGame(self.mc_info)
        self.sim.set_temperature (self.kT)
        self.sim.init_model()
        self.sim.mc_run(self.num_mcsteps)
        self.dump_env_setting()

    # TODO: Option of Render on terminal or File.
    # TODO: Update this function to new apis
    def render(self, mapname ="traj", mode="ansi", close=False):
        #of = StringIO() if mode == "ansi" else sys.stdout
        #print ("Energy: {}, Defect: {}".format(self.sqice.cal_energy_diff(), self.sqice.cal_defect_density()))
        s = None
        # TODO: 
        if (mapname == "traj"):
            s = self._transf2d(self.sim.get_state_diff_map())
        start = self.sim.get_agent_init_site()
        start = (int(start/self.sL), int(start%self.sL))
        s[start] = 3
        screen = "\r"
        screen += "\n\t"
        screen += "+" + self.L * "---" + "+\n"
        for i in range(self.L):
            screen += "\t|"
            for j in range(self.L):
                p = (i, j)
                spin = s[p]
                if spin == -1:
                    screen += " o "
                elif spin == +1:
                    screen += " * "
                elif spin == 0:
                    screen += "   "
                elif spin == +2:
                    screen += " @ "
                elif spin == -2:
                    screen += " O "
                elif spin == +3:
                    # starting point
                    screen += " x "
            screen += "|\n"
        screen += "\t+" + self.L * "---" + "+\n"
        #TODO: Add choice write to terminal or file
        #sys.stdout.write(screen)
        with open(self.rfilename, "a") as f:
            f.write("Episode: {}, global step = {}\n".format(self.sim.get_ep_step_counter(), self.sim.get_total_steps()))
            f.write("{}\n".format(screen))

    def get_obs(self):
        """Get Observation: Critical function of environments.
        """
        local_spins = self._transf1d(self.sim.get_local_spins())
        local_sites = self._transf1d(self.sim.get_local_sites())

        # E, dE, dC: but these values are too small and close to 0 or 1
        phyobs = self._transf1d(self.sim.get_phy_observables())
        disc_phyobs = self._discrete_criteron(phyobs)

        # classify three energy cases

        local_obs = np.concatenate((local_spins, disc_phyobs), axis=0) 

        # global observation
        diff_map = self._transf2d(self.sim.get_state_diff_map())

        """ Sub-region: sliding box observation. 
            Note: ths sub-region size is now fixed.
        """
        if self.use_subregion:
            new_center = move_center(self.center, self.agent_site2d, 32, 32, self.sL, self.sL)
            diff_map= periodic_crop(diff_map, new_center, 32, 32)
            if (diff_map.shape != (32, 32)):
                raise ValueError("[GAME_ENV] EORROR: cropped region is ruined.")
            self.center = new_center

        diff_map = np.expand_dims(diff_map, axis=2)
        # stack three maps

        # return in terms of dict
        """How RL algorithm handle this?
            network takes local_obs and global_obs
            * feed local to forward network (Q: how about using rnn?)
            * feed global to convolutional network
        """
        d = {
            "local_spins" : local_spins,
            "local_sites" : local_sites,
            "local_obs"   : local_obs,
            "global_obs" : diff_map,
        }

        return AttrDict(d)

    @property
    def unwrapped(self):
        """Completely unwrap this env.
            Returns:
                gym.Env: The base non-wrapped gym.Env instance
        """
        return self

    def save_env_settings(self):
        print ("TODO: Change this into dump json")
        print ("TODO, also Recover setting from file.")
        # Write settings into the logfile, modified when setting function is called.
        with open(self.env_settinglog_file, "a") as f:
            # TODO: write new parameters.
            f.write("Launch time: {}\n".format(str(datetime.now())))
            f.write("Number of Observation: {}\n".format(NUM_OBSERVATION_MAPS))
            #f.write("Stepwise reward function: {}\n".format(self.stepwise))
            #f.write("Metropolis reward function: {}\n".format(self.endreward))

    def _transf2d(self, s):
        # add nan_to_num here?
        return np.array(s, dtype=np.float32).reshape([self.sL, self.sL])

    def _transf1d(self, s):
        # suppose originally we have one dim vector
        return np.array(s, dtype=np.float32)

    def _append_record(self, record, fname):
        with open(fname, "a") as f:
            json.dump(record, f)
            f.write(os.linesep)

    def _discrete_criteron(self, phyobs):
        """ 'Discretize' Energy into several level
          E:
            * One defect pair = +1 
            * Several but not many (5~10) = 0
            * Far from ice state = -1 (will this happen?)
          dE: (compare with initail state)
            * Decrease = +1
            * Even = 0
            * Increase = -1
          dC:
            * this is so small, we can enlarge the value itself. 
            * maybe by factor of 10

          Goal: dC increases but dE remains
        """
        E, dE, dC = phyobs
        # well, E and dE are correlated.
        num_defects = dE * self.N / 2
        if (num_defects <= 2):
            num_defects = +1 
        elif (num_defects <=5):
            num_defects = 0
        else:
            num_defects = -1

        # hand-crafted value
        dC *= 5.0

        newphy = [E, num_defects, dC]
        return newphy

    def env_setting(self):
        settings = {
            "N" : self.N,
            "sL" : self.sL,
            "L" : self.L,
            "R_scale" : self.reward_scale,
            "R_upper_thres" : self.reward_threshold,
            "R_lower_thres" : self.reward_threshold,
        }
        return AttrDict(settings)

    def env_status(self):
        """Save status into jsonfile.
            * carefully choose items to be saved.
            * this is the only import thing should be saved.
        """
        # get current timestamp
        total_steps = self.sim.get_total_steps()
        ep = self.sim.get_episode()
        # agent walk # steps in this episode
        ep_step_counters = self.sim.get_ep_step_counter()
        trajectory = self.sim.get_trajectory()
        if self.sim.get_accepted_length():
            loop_length = self.sim.get_accepted_length()[-1]
        else :
            loop_length = 0
        enclosed_area = self.calculate_area()
        update_times = self.sim.get_updated_counter()
        action_counters = self.sim.get_action_statistics()
        action_stats = [x / total_steps for x in action_counters]

        start_site = self.sim.get_agent_init_site()
        acceptance = update_times * 100.0 / ep

        # local_step counter
        local_step = self.sim.get_ep_step_counter()
        # configuration changes == loop length
        effort = loop_length / ep_step_counters * 100.0

        d = {
            "Episode": ep,
            "Steps"  : total_steps,
            "LocalSteps" : local_step,
            "StartSite"  : start_site,
            "Trajectory": trajectory,
            "UpdateTimes": update_times,
            "AcceptanceRatio" : acceptance,
            "LoopLength": loop_length,
            "EnclosedArea": enclosed_area,
            "ActionStats" : action_stats
        }

        return AttrDict(d)

    def dump_env_status(self):
        d = self.env_status()
        self._append_record(d, self.json_file)

    def dump_env_setting(self):
        d = self.env_setting()
        with open(self.env_settinglog_file, "w") as f:
            json.dump(d, f)
예제 #11
0
kT = 0.0001
J = 1
N = 4 * L**2  # Well, this parameter should not set by me...

num_neighbors = 1
num_replicas = 1
num_mcsteps = 2000
num_bins = 1
num_thermalization = num_mcsteps
tempering_period = 1

mc_info = INFO(L, N, num_neighbors, num_replicas, num_bins, num_mcsteps,
               tempering_period, num_thermalization)

# initalize the system, lattice config
sim = SQIceGame(mc_info)
sim.set_temperature(kT)
sim.init_model()

sim.mc_run(num_mcsteps)
#sim.print_lattice()

sim.start(np.random.randint(N))
print("Starting site: {}".format(sim.get_agent_site()))

print(sim.get_trajectory())
# Test for loop algorithm
segs = sim.long_loop_algorithm()
traj = sim.get_trajectory()
print(traj)
print(segs)