def resize_ice_config(self, L, mcsteps): """Resize the whole system.""" # Resize the system size self.L = L self.num_mcsteps = mcsteps self.N = 4*L**2 self.sL = int(np.sqrt(self.N)) # square length L self.mc_info = INFO(self.L, self.N, 1, 1, 1, mcsteps, 1, mcsteps) # Allocate sim again. self.sim = SQIceGame(self.mc_info) self.sim.set_temperature (self.kT) self.sim.init_model() self.sim.mc_run(self.num_mcsteps) self.dump_env_setting()
L = 32 kT = 0.0001 J = 1 N = L**2 num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 mc_info = INFO(L, N, num_neighbors, num_replicas, num_bins, num_mcsteps, tempering_period, num_thermalization) # initalize the system, lattice config sim = SQIceGame(mc_info) sim.set_temperature (kT) sim.init_model() sim.mc_run(num_mcsteps) sim.start(100) eng_map = sim.get_energy_map() print(eng_map) print(type(eng_map)) for i in range(10): print (sim.draw(np.random.randint(6))) print (sim.get_trajectory()) sites = sim.get_trajectory() site_diffs = [j-i for i, j in zip(sites[:-1], sites[1:])]
def __init__(self, L, kT, J): self.L = L self.kT = kT self.J = J self.N = L**2 num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature(self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.name_mapping = dict({ 0: 'right', 1: 'down', 2: 'left', 3: 'up', 4: 'lower_next', 5: 'upper_next', 6: 'metropolis', }) self.index_mapping = dict({ 'right': 0, 'down': 1, 'left': 2, 'up': 3, 'lower_next': 4, 'upper_next': 5, 'metropolis': 6, }) ### action space and state space self.action_space = spaces.Discrete(len(self.name_mapping)) self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 4)) self.reward_range = (-1, 1) # output file self.ofilename = 'loop_sites.log' # render file self.rfilename = 'loop_renders.log' # save log to json for future analysis self.json_file = 'env_history.json' self.stacked_axis = 2 ## counts reset() self.episode_counter = 0 ## ray test self.auto_metropolis = False
class IceGameEnv(core.Env): def __init__(self, L, kT, J): self.L = L self.kT = kT self.J = J self.N = L**2 num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature(self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.name_mapping = dict({ 0: 'right', 1: 'down', 2: 'left', 3: 'up', 4: 'lower_next', 5: 'upper_next', 6: 'metropolis', }) self.index_mapping = dict({ 'right': 0, 'down': 1, 'left': 2, 'up': 3, 'lower_next': 4, 'upper_next': 5, 'metropolis': 6, }) ### action space and state space self.action_space = spaces.Discrete(len(self.name_mapping)) self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 4)) self.reward_range = (-1, 1) # output file self.ofilename = 'loop_sites.log' # render file self.rfilename = 'loop_renders.log' # save log to json for future analysis self.json_file = 'env_history.json' self.stacked_axis = 2 ## counts reset() self.episode_counter = 0 ## ray test self.auto_metropolis = False # ray add list: # 1. log 2D (x, y) in self.ofilename # 2. add self.calculate_area() and loop_area # 3. auto_6 (uncompleted) def step(self, action): terminate = False reward = 0.0 obs = None rets = [0.0, 0.0, 0.0, 0.0] metropolis_executed = False ## execute different type of actions if (action == 6): self.sim.flip_trajectory() rets = self.sim.metropolis() metropolis_executed = True elif (0 <= action < 6): rets = self.sim.draw(action) is_aceept, dEnergy, dDensity, dConfig = rets # metropolis judgement if (metropolis_executed): if is_aceept > 0 and dConfig > 0: self.sim.update_config() print('[GAME_ENV] PROPOSAL ACCEPTED!') total_steps = self.sim.get_total_steps() ep_steps = self.sim.get_ep_step_counter() ep = self.sim.get_episode() loop_length = self.sim.get_accepted_length()[-1] loop_area = self.calculate_area() update_times = self.sim.get_updated_counter() reward = 1.0 * ( loop_length / 4.0 ) # reward with different length by normalizing with len 4 elements # output to self.ofilename with open(self.ofilename, 'a') as f: f.write('1D: {}, \n(2D: {})\n'.format( self.sim.get_trajectory(), self.conver_1Dto2D(self.sim.get_trajectory()))) print('\tSave loop configuration to file: {}'.format( self.ofilename)) print('\tTotal accepted number = {}'.format( self.sim.get_updated_counter())) print('\tAccepted loop length = {}, area = {}'.format( loop_length, loop_area)) print('\tAgent walks {} steps in episode, action counters: {}'. format(ep_steps, self.sim.get_ep_action_counters())) action_counters = self.sim.get_action_statistics() action_stats = [x / total_steps for x in action_counters] print( '\tStatistics of actions all episodes (ep={}, steps={}) : {}' .format(ep, total_steps, action_stats)) print('\tAcceptance ratio (accepted/total Eps) = {}%'.format( update_times * 100.0 / ep)) self.dump_env_states() self.render() self.sim.clear_buffer() else: self.sim.clear_buffer() terminate = True # Avoid running metropolis at start if (rets[3] == 0.0): reward = -0.8 # reset or update else: reward = self._stepwise_weighted_returns(rets) # as usual obs = self.get_obs() ## add timeout mechanism? return obs, reward, terminate, rets # Start function used for agent learing def start(self, init_site=None): if init_site == None: init_agent_site = self.sim.start(rnum(self.N)) else: init_agent_site = self.sim.start(init_site) assert (init_site == init_agent_site) def reset(self): ## clear buffer and set new start of agent site = rnum(self.N) init_site = self.sim.restart(site) assert (init_site == site) self.episode_counter += 1 return self.get_obs() def timeout(self): return self.sim.timeout() @property def agent_site(self): return self.sim.get_agent_site() @property def action_name_mapping(self): return self.name_mapping @property def name_action_mapping(self): return self.index_mapping def _stepwise_weighted_returns(self, rets): icemove_w = 0.000 energy_w = -1.0 defect_w = 0.0 baseline = 0.009765625 ## 1 / 1024 scaling = 2.0 return (icemove_w * rets[0] + energy_w * rets[1] + defect_w * rets[2] + baseline) * scaling ## ray test (for: int, list, np_list) def conver_1Dto2D(self, input_1D): output_2D = None if type(input_1D) == int: output_2D = (int(input_1D / self.L), int(input_1D % self.L)) elif type(input_1D) == list: output_2D = [] for position in input_1D: output_2D.append( (int(position / self.L), int(position % self.L))) return output_2D ## ray test def calculate_area(self): traj_2D = self.conver_1Dto2D(self.sim.get_trajectory()) traj_2D_dict = {} for x, y in traj_2D: if x in traj_2D_dict: traj_2D_dict[x].append(y) else: traj_2D_dict[x] = [y] # check Max y_length y_position_list = [] for y_list in traj_2D_dict.values(): y_position_list = y_position_list + y_list y_position_list = list(set(y_position_list)) max_y_length = len(y_position_list) - 1 area = 0.0 for x in traj_2D_dict: diff = max(traj_2D_dict[x]) - min(traj_2D_dict[x]) if diff > max_y_length: diff = max_y_length temp_area = diff - len( traj_2D_dict[x]) + 1 ## avoid vertical straight line if temp_area > 0: area = area + temp_area return area def render(self, mapname='traj', mode='ansi', close=False): #of = StringIO() if mode == 'ansi' else sys.stdout #print ('Energy: {}, Defect: {}'.format(self.sqice.cal_energy_diff(), self.sqice.cal_defect_density())) s = None if (mapname == 'traj'): s = self._transf2d(self.sim.get_canvas_map()) start = self.sim.get_start_point() start = (int(start / self.L), int(start % self.L)) s[start] = 3 screen = '\r' screen += '\n\t' screen += '+' + self.L * '---' + '+\n' for i in range(self.L): screen += '\t|' for j in range(self.L): p = (i, j) spin = s[p] if spin == -1: screen += ' o ' elif spin == +1: screen += ' * ' elif spin == 0: screen += ' ' elif spin == +2: screen += ' @ ' elif spin == -2: screen += ' O ' elif spin == +3: screen += ' x ' screen += '|\n' screen += '\t+' + self.L * '---' + '+\n' #sys.stdout.write(screen) with open(self.rfilename, 'a') as f: f.write('Episode: {}, global step = {}\n'.format( self.episode_counter, self.sim.get_total_steps())) f.write('{}\n'.format(screen)) def get_obs(self): config_map = self._transf2d(self.sim.get_state_t_map()) canvas_map = self._transf2d(self.sim.get_canvas_map()) energy_map = self._transf2d(self.sim.get_energy_map()) defect_map = self._transf2d(self.sim.get_defect_map()) return np.stack([config_map, canvas_map, energy_map, defect_map], axis=self.stacked_axis) @property def unwrapped(self): """Completely unwrap this env. Returns: gym.Env: The base non-wrapped gym.Env instance """ return self def sysinfo(self): print('') def _transf2d(self, s): # do we need zero mean here? return np.array(s, dtype=np.float32).reshape([self.L, self.L]) def _append_record(self, record): with open(self.json_file, 'a') as f: json.dump(record, f) f.write(os.linesep) def dump_env_states(self): # get current timestamp total_steps = self.sim.get_total_steps() ep = self.sim.get_episode() # agent walk # steps in this episode ep_step_counters = self.sim.get_ep_step_counter() trajectory = self.sim.get_trajectory() if self.sim.get_accepted_length(): loop_length = self.sim.get_accepted_length()[-1] else: loop_length = 0 enclosed_area = self.calculate_area() update_times = self.sim.get_updated_counter() action_counters = self.sim.get_action_statistics() action_stats = [x / total_steps for x in action_counters] start_site = self.sim.get_start_point() acceptance = update_times * 100.0 / ep d = { 'Episode': ep, 'Steps': total_steps, 'StartSite': start_site, 'Trajectory': trajectory, 'UpdateTimes': update_times, 'AcceptanceRatio': acceptance, 'LoopLength': loop_length, 'EnclosedArea': enclosed_area, 'ActionStats': action_stats } self._append_record(d)
def __init__(self, L, kT, J): self.L = L self.kT = kT self.J = J self.N = L**2 num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature(self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.name_mapping = dict({ 0: 'right', 1: 'down', 2: 'left', 3: 'up', 4: 'lower_next', 5: 'upper_next', 6: 'metropolis', }) self.index_mapping = dict({ 'right': 0, 'down': 1, 'left': 2, 'up': 3, 'lower_next': 4, 'upper_next': 5, 'metropolis': 6, }) ### action space and state space self.action_space = spaces.Discrete(len(self.name_mapping)) self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 4)) self.reward_range = (-1, 1) # output file self.ofilename = 'loop_sites.log' # render file self.rfilename = 'loop_renders.log' # save log to json for future analysis self.json_file = 'env_history.json' self.stacked_axis = 2 ## counts reset() self.episode_counter = 0 ## ray test, add list: # 1. log 2D (x, y) in self.ofilename # 2. add self.calculate_area() and loop_area # 3. auto_metropolis (uncompleted) # 8/2: # 5. add save_record_dict(): to record the amount of length and area # 6. add area_reward in step() # 7. add hundred_test(): to count accepted ratio in last 100 episodes # 8. add now_position & path flag in get_obs() self.record_dict = [{}, {}] self.accepted_in_hundred = 0 self.accepted_in_hundred_stack = [] ## ray test, step_setting self.area_reward = True # use both length & area to calculate the reward self.auto_metropolis = False # if the condition is OK, auto execute metropolis self.metropolis_terminal = False # if metropolis_executed == True, terminal = True self.strict_step = False # if rets[0] (is_aceept) == -1, terminal = True
def __init__ (self, L, kT, J, is_cont=False): """IceGame is_cont (bool): Set True that action is continous variable; set Fasle using discrete action. """ self.L = L self.kT = kT self.J = J self.N = L**2 self.is_cont = is_cont num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature (self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.last_update_step = 0 """ History FIFO """ self.Imap = I = np.ones([self.L, self.L]) self.Omap = O = np.zeros([self.L, self.L]) if HIST_LEN > 0: self.canvas_hist = deque([O] * HIST_LEN) self.defect_hist = deque([O] * HIST_LEN) self.idx2act = dict({ 0 : "right", 1 : "down", 2 : "left", 3 : "up", 4 : "lower_next", 5 : "upper_next", 6 : "metropolis" }) self.act2idx = {v: k for k, v in self.idx2act.items()} # action space and state space self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, NUM_OBSERVATION_MAPS)) if is_cont: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(len(self.idx2act))) else: self.action_space = spaces.Discrete(len(self.idx2act)) #TODO: make more clear definition """ Global Observations: * Local Observations: * neighboring spins up & down * """ self.global_observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 2)) self.local_observation_space = spaces.Discrete(7) self.reward_range = (-1, 1) # output file self.ofilename = "loop_sites.log" # render file self.rfilename = "loop_renders.log" # save log to json for future analysis self.json_file = "env_history.json" self.stacked_axis = 2 ## counts reset() self.episode_counter = 0 self.lives = DEFAULT_LIVES ## legacy codes self.auto_metropolis = False
class IceGameEnv(core.Env): def __init__ (self, L, kT, J, is_cont=False): """IceGame is_cont (bool): Set True that action is continous variable; set Fasle using discrete action. """ self.L = L self.kT = kT self.J = J self.N = L**2 self.is_cont = is_cont num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature (self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.last_update_step = 0 """ History FIFO """ self.Imap = I = np.ones([self.L, self.L]) self.Omap = O = np.zeros([self.L, self.L]) if HIST_LEN > 0: self.canvas_hist = deque([O] * HIST_LEN) self.defect_hist = deque([O] * HIST_LEN) self.idx2act = dict({ 0 : "right", 1 : "down", 2 : "left", 3 : "up", 4 : "lower_next", 5 : "upper_next", 6 : "metropolis" }) self.act2idx = {v: k for k, v in self.idx2act.items()} # action space and state space self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, NUM_OBSERVATION_MAPS)) if is_cont: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(len(self.idx2act))) else: self.action_space = spaces.Discrete(len(self.idx2act)) #TODO: make more clear definition """ Global Observations: * Local Observations: * neighboring spins up & down * """ self.global_observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.L, self.L, 2)) self.local_observation_space = spaces.Discrete(7) self.reward_range = (-1, 1) # output file self.ofilename = "loop_sites.log" # render file self.rfilename = "loop_renders.log" # save log to json for future analysis self.json_file = "env_history.json" self.stacked_axis = 2 ## counts reset() self.episode_counter = 0 self.lives = DEFAULT_LIVES ## legacy codes self.auto_metropolis = False # ray add list: # 1. log 2D (x, y) in self.ofilename # 2. add self.calculate_area() and loop_area # 3. auto_6 (uncompleted) def step(self, action): """step function with directional action """ if self.is_cont: # actions are 7 continuous variables, pick the largest one action = np.argmax(action) terminate = False reward = 0.0 # -0.000975 # stepwise punishment. obs = None info = None rets = [0.0, 0.0, 0.0, 0.0] metropolis_executed = False ## execute different type of actions if (action == 6): self.sim.flip_trajectory() rets = self.sim.metropolis() metropolis_executed = True elif (0 <= action < 6) : rets = self.sim.draw(action) """ Results from icegame index 0 plays two roles: if action is walk: rets[0] = is_icemove elif action is metropolis: rets[0] = is_accept """ is_accept, dEnergy, dDensity, dConfig = rets is_icemove = True if is_accept > 0.0 else False # metropolis judgement if (metropolis_executed): if is_accept > 0 and dConfig > 0: """ Updates Accepted 1. Calculate rewards 2. Save logs 3. Reset maps and buffers """ self.sim.update_config() print ("[GAME_ENV] PROPOSAL ACCEPTED!") total_steps = self.sim.get_total_steps() ep_steps = self.sim.get_ep_step_counter() ep = self.sim.get_episode() loop_length = self.sim.get_accepted_length()[-1] loop_area = self.calculate_area() # get counters action_counters = self.sim.get_action_statistics() metropolis_times = self.sim.get_updating_counter() update_times = self.sim.get_updated_counter() # compute update interval update_interval = total_steps - self.last_update_step self.last_update_step = total_steps # acceptance rate total_acc_rate = self.sim.get_total_acceptance_rate() * 100.0 effort = update_times/total_steps * 100.0 reward = 1.0 * (loop_length / LOOP_UNIT_REWARD) # reward with different length by normalizing with len 4 elements # TODO: Calculate recent # steps' acceptance rate # output to self.ofilename with open(self.ofilename, "a") as f: f.write("1D: {}, \n(2D: {})\n".format(self.sim.get_trajectory(), self.convert_1Dto2D(self.sim.get_trajectory()))) print ("\tSave loop configuration to file: {}".format(self.ofilename)) print ("\tTotal accepted number = {}".format(update_times)) print ("\tAccepted loop length = {}, area = {}".format(loop_length, loop_area)) print ("\tAgent walks {} steps in episode, action counters: {}".format(ep_steps, self.sim.get_ep_action_counters())) action_stats = [x / total_steps for x in action_counters] print ("\tStatistics of actions all episodes (ep={}, steps={}) : {}".format(ep, total_steps, action_stats)) print ("\tAcceptance ratio (accepted/ # of metropolis) = {}%".format( update_times * 100.0 / metropolis_times)) print ("\tAcceptance ratio (from icegame) = {}%".format(total_acc_rate)) print ("\tRunning Effort = {}%".format(effort)) # TODO: How to describe the loop? info = { "Acceptance Ratio" : total_acc_rate, "Running Effort": effort, "Updated" : update_times, "Loop Size": loop_length, "Loop Area": loop_area, } # Stop rendering, it save huge log # self.render() self.dump_env_states() self.sim.clear_buffer() """ Terminate? stop after accpetance, will increase the episode rewards. But can we still running the program to increase the total rewards? Or do not terminate, just reset the location? """ # terminate = True self.sim.restart(rnum(self.N)) else: self.sim.clear_buffer() self.lives -= 1 """ Rejection 1. Keep updating with new canvas. or Early stop. 2. Wrong decision penalty """ reward = -0.001 #self.episode_terminate = True #terminate = True # Avoid running metropolis at start (Too hand-crafted method!) #if (rets[3] == 0.0): # reward = -0.8 # reset or update else: """Stepwise feedback: 1. exploration 2. icemove reards 3. defect propagation guiding 4. #more TODO: Write option in init arguments. """ #reward = self._stepwise_weighted_returns(rets) # Check each scale (each of them stays in 0~1) #reward = 0.002 - (dEnergy + dDensity) #reward = -(dEnergy + dDensity) + dConfig if is_icemove: reward = .001 #print ("is icemove: {}, {}".format(dEnergy, dDensity)) else: reward = -.001 #print ("not icemove: {}, {}".format(dEnergy, dDensity)) # as usual obs = self.get_obs() #obs = self.get_hist_obs() ## add timeout mechanism? # Add the timeout counter if self.lives <= 0: terminate = True # Not always return info return obs, reward, terminate, info # Start function used for agent learning def start(self, init_site=None): """ Returns: same as step() obs, reward, terminate, rets """ if init_site == None: init_agent_site = self.sim.start(rnum(self.N)) else: init_agent_site = self.sim.start(init_site) assert(self.agent_site == init_agent_site) self.episode_terminate = False self.lives = DEFAULT_LIVES return self.get_obs() #return self.get_hist_obs() def reset(self, site=None): ## clear buffer and set new start of agent if site is None: site = rnum(self.N) init_site = self.sim.restart(site) assert(init_site == site) self.episode_terminate = False self.episode_counter += 1 self.lives = DEFAULT_LIVES # actually, counter can be called by sim.get_episode() # Clear the fifo queue if HIST_LEN > 0: self.canvas_hist.clear() self.defect_hist.clear() for _ in range(HIST_LEN): self.canvas_hist.append(self.Omap) self.defect_hist.append(self.Omap) info = None return self.get_obs() #return self.get_hist_obs() def timeout(self): return self.sim.timeout() @property def game_status(self): """Return whether game is terminate""" return self.episode_terminate def set_output_path(self, path): self.ofilename = os.path.join(path, self.ofilename) self.rfilename = os.path.join(path, self.rfilename) self.json_file = os.path.join(path, self.json_file) print ("Set environment logging to {}".format(self.ofilename)) print ("Set loop and sites logging to {}".format(self.rfilename)) print ("Set results dumpping path to {}".format(self.json_file)) @property def agent_site(self): return self.sim.get_agent_site() @property def action_name_mapping(self): return self.idx2act @property def name_action_mapping(self): return self.act2idx def reward_function(self, rets): pass """ Different Reward Strategies Here """ def _stepwise_weighted_returns(self, rets): icemove_w = 0.000 energy_w = -1.0 defect_w = 0.0 baseline = 0.009765625 ## 1 / 1024 scaling = 2.0 return (icemove_w * rets[0] + energy_w * rets[1] + defect_w * rets[2] + baseline) * scaling ## ray test (for: int, list, np_list) def convert_1Dto2D(self, input_1D): output_2D = None if type(input_1D) == int: output_2D = (int(input_1D/self.L), int(input_1D%self.L)) elif type(input_1D) == list: output_2D = [] for position in input_1D: output_2D.append((int(position/self.L), int(position%self.L))) return output_2D ## ray test def calculate_area(self): """TODO: The periodic boundary condition is too naive that can be modified. """ traj_2D = self.convert_1Dto2D(self.sim.get_trajectory()) traj_2D_dict = {} for x, y in traj_2D: if x in traj_2D_dict: traj_2D_dict[x].append(y) else: traj_2D_dict[x] = [y] # check Max y_length y_position_list = [] for y_list in traj_2D_dict.values(): y_position_list = y_position_list + y_list y_position_list = list(set(y_position_list)) max_y_length = len(y_position_list) -1 area = 0.0 for x in traj_2D_dict: diff = max(traj_2D_dict[x]) - min(traj_2D_dict[x]) if diff > max_y_length: diff = max_y_length temp_area = diff - len(traj_2D_dict[x]) +1 ## avoid vertical straight line if temp_area > 0: area = area + temp_area return area # TODO: Render on terminal. def render(self, mapname ="traj", mode="ansi", close=False): #of = StringIO() if mode == "ansi" else sys.stdout #print ("Energy: {}, Defect: {}".format(self.sqice.cal_energy_diff(), self.sqice.cal_defect_density())) s = None if (mapname == "traj"): s = self._transf2d(self.sim.get_canvas_map()) start = self.sim.get_start_point() start = (int(start/self.L), int(start%self.L)) s[start] = 3 screen = "\r" screen += "\n\t" screen += "+" + self.L * "---" + "+\n" for i in range(self.L): screen += "\t|" for j in range(self.L): p = (i, j) spin = s[p] if spin == -1: screen += " o " elif spin == +1: screen += " * " elif spin == 0: screen += " " elif spin == +2: screen += " @ " elif spin == -2: screen += " O " elif spin == +3: # starting point screen += " x " screen += "|\n" screen += "\t+" + self.L * "---" + "+\n" #TODO: Add choice write to terminal or file #sys.stdout.write(screen) with open(self.rfilename, "a") as f: f.write("Episode: {}, global step = {}\n".format(self.episode_counter, self.sim.get_total_steps())) f.write("{}\n".format(screen)) def get_obs(self): """ Need more flexible in get_obs. There will may be config, sequence, scalar observed states. TODO: add np.nan_to_num() to prevent ill value """ config_map = self._transf2d(self.sim.get_state_t_map_color()) #config_map = self._transf2d(self.sim.get_state_t_map()) valid_map = self._transf2d(self.sim.get_valid_action_map()) canvas_map = self._transf2d(self.sim.get_canvas_map()) energy_map = self._transf2d(self.sim.get_energy_map()) defect_map = self._transf2d(self.sim.get_defect_map()) return np.stack([config_map, valid_map, canvas_map, energy_map, defect_map ], axis=self.stacked_axis) def get_hist_obs(self): config_map = self._transf2d(self.sim.get_state_t_map_color()) valid_map = self._transf2d(self.sim.get_valid_action_map()) canvas_map = self._transf2d(self.sim.get_canvas_map()) energy_map = self._transf2d(self.sim.get_energy_map()) defect_map = self._transf2d(self.sim.get_defect_map()) self.canvas_hist.append(canvas_map) self.canvas_hist.popleft() self.defect_hist.append(defect_map) self.defect_hist.popleft() canvas_traj = np.stack([canvas for canvas in self.canvas_hist], axis=self.stacked_axis) defect_traj = np.stack([dmap for dmap in self.defect_hist], axis=self.stacked_axis) config_map = np.expand_dims(config_map, axis=self.stacked_axis) valid_map = np.expand_dims(valid_map, axis=self.stacked_axis) energy_map = np.expand_dims(energy_map, axis=self.stacked_axis) return np.concatenate([config_map, valid_map, energy_map, canvas_traj, defect_traj ], axis=self.stacked_axis) def get_partial_obs(self): """Partial Observation: Get the multiple channel (different format) from the same states. Return: local: neighboring relation of the state_t global: whole maps of the state_t """ pass @property def unwrapped(self): """Completely unwrap this env. Returns: gym.Env: The base non-wrapped gym.Env instance """ return self def sysinfo(self): print ("") def _transf2d(self, s): # do we need zero mean here? return np.array(s, dtype=np.float32).reshape([self.L, self.L]) def _append_record(self, record): with open(self.json_file, "a") as f: json.dump(record, f) f.write(os.linesep) def dump_env_states(self): # get current timestamp total_steps = self.sim.get_total_steps() ep = self.sim.get_episode() # agent walk # steps in this episode ep_step_counters = self.sim.get_ep_step_counter() trajectory = self.sim.get_trajectory() if self.sim.get_accepted_length(): loop_length = self.sim.get_accepted_length()[-1] else : loop_length = 0 enclosed_area = self.calculate_area() update_times = self.sim.get_updated_counter() action_counters = self.sim.get_action_statistics() action_stats = [x / total_steps for x in action_counters] start_site = self.sim.get_start_point() acceptance = update_times * 100.0 / ep d = { "Episode": ep, "Steps" : total_steps, "StartSite" : start_site, "Trajectory": trajectory, "UpdateTimes": update_times, "AcceptanceRatio" : acceptance, "LoopLength": loop_length, "EnclosedArea": enclosed_area, "ActionStats" : action_stats } self._append_record(d)
from icegame import SQIceGame, INFO import numpy as np import matplotlib.pyplot as plt # physical parameters L = 4 kT = 0.0001 J = 1 N = L**2 num_neighbors = 2 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 mc_info = INFO(L, N, num_neighbors, num_replicas, num_bins, num_mcsteps, tempering_period, num_thermalization) # initalize the system, lattice config sim = SQIceGame(mc_info) sim.set_temperature(kT) sim.init_model() sim.mc_run(num_mcsteps) sim.start(0)
def __init__ (self, L, kT, J, stepwise_reward="constant", end_reward="loopsize", terminate_mode="trial", obs_type="multi", ): """IceGame *** Considering more action and state spaces. Use autocorr as reward. *** Args: stepwise: endreward: terminate_mode: * metro: Each time metropolis is executed, then call it an episode. * trial: Finite trial times each episodes obs_type (observation type): * multi: * global_local: reset_each_epsidoes: reset configuration each # of episodes """ self.L = L self.kT = kT self.J = J self.N = 4*L**2 self.sL = int(np.sqrt(self.N)) # square length L self.stepwis = stepwise_reward self.endreward = end_reward self.terminate_mode = terminate_mode self.obs_type = obs_type num_neighbors = 1 num_replicas = 1 num_mcsteps = 10000 self.num_mcsteps = num_mcsteps num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature (self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.last_update_step = 0 # why do we need to keep track last returned results? self.last_rets = None self.center = None # used for sliding window self.use_subregion = False # Extend the action to 8+1 = 9 actions self.idx2act = dict({ 0 : "head_0", 1 : "head_1", 2 : "head_2", 3 : "tail_0", 4 : "tail_1", 5 : "tail_2", 6 : "metropolis", }) self.act2idx = {v: k for k, v in self.idx2act.items()} # action space and state space # global_observation_space self.global_observation_space = spaces.Box(low=-1, high=1.0, shape=(self.sL, self.sL, 1), dtype=np.float32) # local_observation_space (neighbor + agent + physical obs) self.local_observation_space = spaces.Discrete(10) self.action_space = spaces.Discrete(len(self.idx2act)) self.reward_range = (-1, 1) # for convention (legacy code) self.observation_space = spaces.Box(low=-1, high=1.0, shape=(self.L, self.L, 4), dtype=np.float32) # reference configuration: buffer for initial config each episode self.refconfig = None # TODO: Scheduling reward scale self.reward_scale = 1.0 self.reward_threshold = 0.0 self.reward_trajectory = [] """Choose Observation Function """ self.cfg_outdir = "configs" # output file self.ofilename = "loop_sites.log" # render file self.rfilename = "loop_renders.log" # save log to json for future analysis self.json_file = "env_history.json" # Need more info writing down in env settings self.env_settinglog_file = "env_settings.json" self.stacked_axis = 2
class IcegameEnv(core.Env): def __init__ (self, L, kT, J, stepwise_reward="constant", end_reward="loopsize", terminate_mode="trial", obs_type="multi", ): """IceGame *** Considering more action and state spaces. Use autocorr as reward. *** Args: stepwise: endreward: terminate_mode: * metro: Each time metropolis is executed, then call it an episode. * trial: Finite trial times each episodes obs_type (observation type): * multi: * global_local: reset_each_epsidoes: reset configuration each # of episodes """ self.L = L self.kT = kT self.J = J self.N = 4*L**2 self.sL = int(np.sqrt(self.N)) # square length L self.stepwis = stepwise_reward self.endreward = end_reward self.terminate_mode = terminate_mode self.obs_type = obs_type num_neighbors = 1 num_replicas = 1 num_mcsteps = 10000 self.num_mcsteps = num_mcsteps num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 self.mc_info = INFO(self.L, self.N, num_neighbors, num_replicas, \ num_bins, num_mcsteps, tempering_period, num_thermalization) self.sim = SQIceGame(self.mc_info) self.sim.set_temperature (self.kT) self.sim.init_model() self.sim.mc_run(num_mcsteps) self.episode_terminate = False self.accepted_episode = False self.last_update_step = 0 # why do we need to keep track last returned results? self.last_rets = None self.center = None # used for sliding window self.use_subregion = False # Extend the action to 8+1 = 9 actions self.idx2act = dict({ 0 : "head_0", 1 : "head_1", 2 : "head_2", 3 : "tail_0", 4 : "tail_1", 5 : "tail_2", 6 : "metropolis", }) self.act2idx = {v: k for k, v in self.idx2act.items()} # action space and state space # global_observation_space self.global_observation_space = spaces.Box(low=-1, high=1.0, shape=(self.sL, self.sL, 1), dtype=np.float32) # local_observation_space (neighbor + agent + physical obs) self.local_observation_space = spaces.Discrete(10) self.action_space = spaces.Discrete(len(self.idx2act)) self.reward_range = (-1, 1) # for convention (legacy code) self.observation_space = spaces.Box(low=-1, high=1.0, shape=(self.L, self.L, 4), dtype=np.float32) # reference configuration: buffer for initial config each episode self.refconfig = None # TODO: Scheduling reward scale self.reward_scale = 1.0 self.reward_threshold = 0.0 self.reward_trajectory = [] """Choose Observation Function """ self.cfg_outdir = "configs" # output file self.ofilename = "loop_sites.log" # render file self.rfilename = "loop_renders.log" # save log to json for future analysis self.json_file = "env_history.json" # Need more info writing down in env settings self.env_settinglog_file = "env_settings.json" self.stacked_axis = 2 ## counts reset() def auto_step(self): # auto_step works as long loop algorithm. guides = self.sim.guide_action() # Or, we can execute metropolis when guide fails E, D, dC = self.sim.get_phy_observables() if (E == -1): act = self.name_action_mapping["metropolis"] else: act = np.random.choice(guides) return self.step(act) def step(self, action): """Step function Args: action Returns: obs, reward, done, info TODO: Taking nested list of actions as a single 'action' on markov chain transition. """ terminate = False reward = 0.0 obs = None info = None metropolis_executed = False ## execute different type of actions ## maybe we need a better action index if (action == 6): self.sim.flip_trajectory() rets = self.sim.metropolis() metropolis_executed = True elif (0 <= action < 6) : rets = self.sim.move(action) """ Results from icegame index 0 plays two roles: if action is walk: rets[0] = is_icemove elif action is metropolis: rets[0] = is_accept """ is_accept, dEnergy, dConfig = rets is_icemove = True if is_accept > 0.0 else False self.last_rets = rets # metropolis judgement if (metropolis_executed): """TODO: Add autocorr of config here. """ if is_accept > 0 and dConfig > 0: """ Updates Accepted 1. Calculate rewards 1.1 Get current configuration before updating 1.2 calculate the inner product 1.3 reward = 1.0 - autocorr 2. Save logs 3. Reset maps and buffers """ #current_config = self._transf2d(self.sim.get_state_tp1_map_color()) #statevec = transf_binary_vector(current_config) self.sim.update_config() print ("[GAME_ENV] PROPOSAL ACCEPTED!") total_steps = self.sim.get_total_steps() ep_steps = self.sim.get_ep_step_counter() ep = self.sim.get_episode() loop_length = self.sim.get_accepted_length()[-1] loop_area = self.calculate_area() # get counters action_counters = self.sim.get_action_statistics() metropolis_times = self.sim.get_updating_counter() updated_times = self.sim.get_updated_counter() # compute update interval update_interval = total_steps - self.last_update_step self.last_update_step = total_steps # acceptance rate total_acc_rate = self.sim.get_total_acceptance_rate() * 100.0 #effort = updated_times/total_steps * 100.0 effort = loop_length / ep_steps * 100.0 # calculate the metropolis reward #acorr = autocorr(statevec, self.refconfig) #reward = (1.0 - acorr) * self.reward_scale reward = 1.0 # TODO: Calculate recent # steps' acceptance rate """Dump resutls into file. TODO: Different counter """ # output to self.ofilename: NOTE: No need to save this, all info in hist.json. #with open(self.ofilename, "a") as f: # f.write("1D: {}, \n(2D: {})\n".format(self.sim.get_trajectory(), self.convert_1Dto2D(self.sim.get_trajectory()))) # print ("\tSave loop configuration to file: {}".format(self.ofilename)) print ("\tGlobal step: {}, Local step: {}".format(total_steps, ep_steps)) print ("\tTotal accepted number = {}".format(updated_times)) print ("\tTotal Metropolis number = {}".format(metropolis_times)) print ("\tAccepted loop length = {}, area = {}".format(loop_length, loop_area)) print ("\tAgent walks {} steps in episode, action counters: {}".format(ep_steps, self.sim.get_ep_action_counters())) action_stats = [x / total_steps for x in action_counters] print ("\tStatistics of actions all episodes (ep={}, steps={}) : {}".format(ep, total_steps, action_stats)) print ("\tAcceptance ratio (accepted/ # of metropolis) = {}%".format( updated_times * 100.0 / metropolis_times)) print ("\tAcceptance ratio (accepted/ # of episodes) = {}%".format( updated_times * 100.0 / ep)) print ("\tAcceptance ratio (from icegame) = {}%".format(total_acc_rate)) print ("\tRunning Effort = {}%".format(effort)) # TODO: How to describe the loop? info = { "Acceptance Ratio" : total_acc_rate, "Running Effort": effort, "Updated" : updated_times, "Loop Size": loop_length, "Loop Area": loop_area, } # Render when special case happened. #if loop_area >= 1 or loop_length >= 8: #self.render() self.dump_env_status() self.sim.clear_buffer() """ Terminate? stop after accpetance, will increase the episode rewards. But can we still running the program to increase the total rewards? Or do not terminate, just reset the location? """ # reset the initial position and clear buffer # TODO: Check the difference # no need to reset here # self.sim.reset(rnum(self.N)) terminate = True else: """ Rejection or dConfig == 0 1. Keep updating with new canvas. or Early stop. 2. Wrong decision penalty Q: Should we reset the initial location? Q: How to handle no config change? Q: This should be some penalty here. """ self.sim.clear_buffer() reward = 0.0 terminate = True # reset or update else: """Stepwise feedback: 1. exploration 2. icemove reards 3. defect propagation guiding 4. #more TODO: Write option in init arguments. """ # Check each scale (each of them stays in 0~1) # TODO: calculate reward wrt physical observation _, diffeng_level, _ = self._discrete_criteron(self.physical_observables) # asymmetric reward doest work well. # 100 --> L*L --> N reward = diffeng_level / (self.L * self.L) #reward = diffeng_level / 100 # Reset if timeout from env. if (self.sim.timeout()): terminate = True obs = self.get_obs() # Add the timeout counter (TODO: Check these codes) # Terminate and run monte carlo to prepare new config #terminate = True ### TODO: Add configuration reset counter! #print ("[GAME_ENV] Reset Ice Configuration!") #self.sim.reset_config() # Not always return info self.reward_trajectory.append(reward) return obs, reward, terminate, info # Start function used for agent learning def start(self, init_site=None, create_defect=True): """ Q: Do we flip at start? I think flip @ starting point is reasonable. Returns: same as step() obs, reward, terminate, rets """ if init_site == None: init_agent_site = self.sim.start(rnum(self.N)) else: init_agent_site = self.sim.start(init_site) assert(self.agent_site == init_agent_site) if create_defect: self.sim.flip() # remove this legacy? self.center = self.agent_site2d state = self.get_obs() # reference configuration #self.refconfig = transf_binary_vector(state.configs_2d[:,:,0]) return state def reset(self, site=None, create_defect=True): """reset is called by RL convention. """ ## clear buffer and set new start of agent if site is None: site = rnum(self.N) init_site = self.sim.reset(site) assert(init_site == site) # actually, counter can be called by sim.get_episode() self.center = self.agent_site2d if create_defect: self.sim.flip() """TODO This mechanism should be checked. Reset configuration: run monte carlo again. """ #print ("[GAME_ENV] Reset Ice Configuration!") #self.sim.reset_config() info = None self.last_rets = None self.reward_trajectory = [] state = self.get_obs() # reference configuration # self.refconfig = transf_binary_vector(state.configs_2d[:,:,0]) return state def timeout(self): return self.sim.timeout() def get_game_status(self): """(TODO)Return the game status including steps and physical observables. returns: """ total_steps = self.sim.get_total_steps() ep_steps = self.sim.get_ep_step_counter() ep = self.sim.get_episode() # get counters metropolis_times = self.sim.get_updating_counter() update_times = self.sim.get_updated_counter() # compute update interval update_interval = total_steps - self.last_update_step # acceptance rate total_acc_rate = self.sim.get_total_acceptance_rate() * 100.0 effort = update_times/total_steps * 100.0 d = { "total_steps": total_steps, "updated_times": update_times, } return AttrDict(d) def set_output_path(self, path): if not os.path.exists(path): os.mkdir(path) self.cfg_outdir = os.path.join(path, self.cfg_outdir) if not os.path.exists(self.cfg_outdir): os.mkdir(self.cfg_outdir) self.ofilename = os.path.join(path, self.ofilename) self.rfilename = os.path.join(path, self.rfilename) self.json_file = os.path.join(path, self.json_file) self.env_settinglog_file = os.path.join(path, self.env_settinglog_file) print ("Set results dumpping path to {}".format(self.json_file)) print ("Set env setting log path to {}".format(self.env_settinglog_file)) @property def agent_site(self): return self.sim.get_agent_site() @property def agent_site2d(self): #TODO FIX return (self.sim.get_agent_site()//self.sL, self.sim.get_agent_site()%self.sL) def set_agent_site(self, site, clear_map=False): #Notice: sim.start() is just set agent on site, # but not clear the maps. (call restart if needed.) if 0 <= site < self.N: if clear_map: self.sim.restart(site) else: self.sim.start(site) def enable_subregion(self): self.use_subregion = True def disable_subregion(self): self.use_subregion = False @property def action_name_mapping(self): return self.idx2act @property def name_action_mapping(self): return self.act2idx @property def physical_observables(self): return self.sim.get_phy_observables() # TODO: Need to replace these codes. ## ray test (for: int, list, np_list) def convert_1Dto2D(self, input_1D): """This function is provided by Thisray. The problematic function, fixing is needed. """ output_2D = None if type(input_1D) == int: output_2D = (int(input_1D/self.L), int(input_1D%self.L)) elif type(input_1D) == list: output_2D = [] # better use of list comprehension for position in input_1D: output_2D.append((int(position/self.L), int(position%self.L))) return output_2D def calculate_area(self): """TODO: The periodic boundary condition can be modified. This function is provided by Thisray. The problematic function, fixing is needed. """ traj_2D = self.convert_1Dto2D(self.sim.get_trajectory()) traj_2D_dict = {} for x, y in traj_2D: if x in traj_2D_dict: traj_2D_dict[x].append(y) else: traj_2D_dict[x] = [y] # check Max y_length y_position_list = [] for y_list in traj_2D_dict.values(): y_position_list = y_position_list + y_list y_position_list = list(set(y_position_list)) max_y_length = len(y_position_list) -1 area = 0.0 for x in traj_2D_dict: diff = max(traj_2D_dict[x]) - min(traj_2D_dict[x]) if diff > max_y_length: diff = max_y_length temp_area = diff - len(traj_2D_dict[x]) +1 ## avoid vertical straight line if temp_area > 0: area = area + temp_area return area def set_ice(self, s): """Convert numpy array into python list, then set_ice""" if type(s) == np.ndarray: s = s.tolist() self.sim.set_ice(s) elif type(s) == list: self.sim.set_ice(s) else: raise ValueError("Only numpy array or list are accepted.") def load_ice(self, path): """Read out ice configuration from npy.""" loaded = np.load(path) self.set_ice(loaded) def save_ice(self): """Save out the ice configuration in numpy format.""" s = self._transf1d(self.sim.get_state_t()) # convert into numpy array ep = self.sim.get_episode() fname = "ice_{}".format(ep) fname = os.path.join(self.cfg_outdir, fname) np.save(fname, s) print ("Save the initial configuration @ episode {} to {}".format( ep, self.cfg_outdir)) def reset_ice_config(self): pass def resize_ice_config(self, L, mcsteps): """Resize the whole system.""" # Resize the system size self.L = L self.num_mcsteps = mcsteps self.N = 4*L**2 self.sL = int(np.sqrt(self.N)) # square length L self.mc_info = INFO(self.L, self.N, 1, 1, 1, mcsteps, 1, mcsteps) # Allocate sim again. self.sim = SQIceGame(self.mc_info) self.sim.set_temperature (self.kT) self.sim.init_model() self.sim.mc_run(self.num_mcsteps) self.dump_env_setting() # TODO: Option of Render on terminal or File. # TODO: Update this function to new apis def render(self, mapname ="traj", mode="ansi", close=False): #of = StringIO() if mode == "ansi" else sys.stdout #print ("Energy: {}, Defect: {}".format(self.sqice.cal_energy_diff(), self.sqice.cal_defect_density())) s = None # TODO: if (mapname == "traj"): s = self._transf2d(self.sim.get_state_diff_map()) start = self.sim.get_agent_init_site() start = (int(start/self.sL), int(start%self.sL)) s[start] = 3 screen = "\r" screen += "\n\t" screen += "+" + self.L * "---" + "+\n" for i in range(self.L): screen += "\t|" for j in range(self.L): p = (i, j) spin = s[p] if spin == -1: screen += " o " elif spin == +1: screen += " * " elif spin == 0: screen += " " elif spin == +2: screen += " @ " elif spin == -2: screen += " O " elif spin == +3: # starting point screen += " x " screen += "|\n" screen += "\t+" + self.L * "---" + "+\n" #TODO: Add choice write to terminal or file #sys.stdout.write(screen) with open(self.rfilename, "a") as f: f.write("Episode: {}, global step = {}\n".format(self.sim.get_ep_step_counter(), self.sim.get_total_steps())) f.write("{}\n".format(screen)) def get_obs(self): """Get Observation: Critical function of environments. """ local_spins = self._transf1d(self.sim.get_local_spins()) local_sites = self._transf1d(self.sim.get_local_sites()) # E, dE, dC: but these values are too small and close to 0 or 1 phyobs = self._transf1d(self.sim.get_phy_observables()) disc_phyobs = self._discrete_criteron(phyobs) # classify three energy cases local_obs = np.concatenate((local_spins, disc_phyobs), axis=0) # global observation diff_map = self._transf2d(self.sim.get_state_diff_map()) """ Sub-region: sliding box observation. Note: ths sub-region size is now fixed. """ if self.use_subregion: new_center = move_center(self.center, self.agent_site2d, 32, 32, self.sL, self.sL) diff_map= periodic_crop(diff_map, new_center, 32, 32) if (diff_map.shape != (32, 32)): raise ValueError("[GAME_ENV] EORROR: cropped region is ruined.") self.center = new_center diff_map = np.expand_dims(diff_map, axis=2) # stack three maps # return in terms of dict """How RL algorithm handle this? network takes local_obs and global_obs * feed local to forward network (Q: how about using rnn?) * feed global to convolutional network """ d = { "local_spins" : local_spins, "local_sites" : local_sites, "local_obs" : local_obs, "global_obs" : diff_map, } return AttrDict(d) @property def unwrapped(self): """Completely unwrap this env. Returns: gym.Env: The base non-wrapped gym.Env instance """ return self def save_env_settings(self): print ("TODO: Change this into dump json") print ("TODO, also Recover setting from file.") # Write settings into the logfile, modified when setting function is called. with open(self.env_settinglog_file, "a") as f: # TODO: write new parameters. f.write("Launch time: {}\n".format(str(datetime.now()))) f.write("Number of Observation: {}\n".format(NUM_OBSERVATION_MAPS)) #f.write("Stepwise reward function: {}\n".format(self.stepwise)) #f.write("Metropolis reward function: {}\n".format(self.endreward)) def _transf2d(self, s): # add nan_to_num here? return np.array(s, dtype=np.float32).reshape([self.sL, self.sL]) def _transf1d(self, s): # suppose originally we have one dim vector return np.array(s, dtype=np.float32) def _append_record(self, record, fname): with open(fname, "a") as f: json.dump(record, f) f.write(os.linesep) def _discrete_criteron(self, phyobs): """ 'Discretize' Energy into several level E: * One defect pair = +1 * Several but not many (5~10) = 0 * Far from ice state = -1 (will this happen?) dE: (compare with initail state) * Decrease = +1 * Even = 0 * Increase = -1 dC: * this is so small, we can enlarge the value itself. * maybe by factor of 10 Goal: dC increases but dE remains """ E, dE, dC = phyobs # well, E and dE are correlated. num_defects = dE * self.N / 2 if (num_defects <= 2): num_defects = +1 elif (num_defects <=5): num_defects = 0 else: num_defects = -1 # hand-crafted value dC *= 5.0 newphy = [E, num_defects, dC] return newphy def env_setting(self): settings = { "N" : self.N, "sL" : self.sL, "L" : self.L, "R_scale" : self.reward_scale, "R_upper_thres" : self.reward_threshold, "R_lower_thres" : self.reward_threshold, } return AttrDict(settings) def env_status(self): """Save status into jsonfile. * carefully choose items to be saved. * this is the only import thing should be saved. """ # get current timestamp total_steps = self.sim.get_total_steps() ep = self.sim.get_episode() # agent walk # steps in this episode ep_step_counters = self.sim.get_ep_step_counter() trajectory = self.sim.get_trajectory() if self.sim.get_accepted_length(): loop_length = self.sim.get_accepted_length()[-1] else : loop_length = 0 enclosed_area = self.calculate_area() update_times = self.sim.get_updated_counter() action_counters = self.sim.get_action_statistics() action_stats = [x / total_steps for x in action_counters] start_site = self.sim.get_agent_init_site() acceptance = update_times * 100.0 / ep # local_step counter local_step = self.sim.get_ep_step_counter() # configuration changes == loop length effort = loop_length / ep_step_counters * 100.0 d = { "Episode": ep, "Steps" : total_steps, "LocalSteps" : local_step, "StartSite" : start_site, "Trajectory": trajectory, "UpdateTimes": update_times, "AcceptanceRatio" : acceptance, "LoopLength": loop_length, "EnclosedArea": enclosed_area, "ActionStats" : action_stats } return AttrDict(d) def dump_env_status(self): d = self.env_status() self._append_record(d, self.json_file) def dump_env_setting(self): d = self.env_setting() with open(self.env_settinglog_file, "w") as f: json.dump(d, f)
kT = 0.0001 J = 1 N = 4 * L**2 # Well, this parameter should not set by me... num_neighbors = 1 num_replicas = 1 num_mcsteps = 2000 num_bins = 1 num_thermalization = num_mcsteps tempering_period = 1 mc_info = INFO(L, N, num_neighbors, num_replicas, num_bins, num_mcsteps, tempering_period, num_thermalization) # initalize the system, lattice config sim = SQIceGame(mc_info) sim.set_temperature(kT) sim.init_model() sim.mc_run(num_mcsteps) #sim.print_lattice() sim.start(np.random.randint(N)) print("Starting site: {}".format(sim.get_agent_site())) print(sim.get_trajectory()) # Test for loop algorithm segs = sim.long_loop_algorithm() traj = sim.get_trajectory() print(traj) print(segs)