def optimizer_worker(train): print("Optimizer worker started") while train.replay_buffer.ready() == False: sleep(10) while True: for _ in range(5): sampled = train.replay_buffer.get_samples(train.batch_size) state_batch, action_batch, value_batch = zip(*sampled) # SB need to read file from state.txt file to prevent json file is too big states = [] for state_path in state_batch: world = World(state_path) state = world.current_state() states.append(state) # need change visit count to probability distribution actions = [] for visit_count in action_batch: visit_count_array = np.array(visit_count) total_visit = np.sum(visit_count_array) probability_distribution = visit_count_array / total_visit actions.append(probability_distribution) state_batch_reshaped = np.reshape( states, (-1, train.obs_shape[1], train.obs_shape[0], train.obs_shape[2])) action_batch_reshaped = np.reshape(actions, (-1, train.act_shape)) value_batch_reshaped = np.reshape(value_batch, (-1, 1)) loss, entropy = train.network.train_step(state_batch_reshaped, action_batch_reshaped, value_batch_reshaped, train.learning_rate) print(loss, entropy) train.network.save_model(os.path.join(os.getcwd(), "latest"))
def network_thread(player, temp_folder): print("Network thread started for " + temp_folder) state_path = os.path.join(temp_folder, "State.txt") policy_value_path = os.path.join(temp_folder, "PolicyValue.txt") policy_value_temp = os.path.join(temp_folder, "PolicyValueTemp.txt") while True: sleep(0.05) if os.access(state_path, os.F_OK) and os.access(state_path, os.R_OK): try: world = World(state_path, width, height) state = world.current_state() os.remove(state_path) policy, value = player.policy_value(state) #print(policy, value) policy_str = "" for i in range(action_count): policy_str += str(policy[i]) if i != action_count - 1: policy_str += "_" value_str = str(value) with open(policy_value_temp, 'w') as f: f.write(policy_str + "\n") f.write(value_str) if os.access(policy_value_path, os.F_OK): os.remove(policy_value_path) shutil.move(policy_value_temp, policy_value_path) except Exception as e: print(str(e))