Exemple #1
0
def optimizer_worker(train):
    print("Optimizer worker started")
    while train.replay_buffer.ready() == False:
        sleep(10)
    while True:
        for _ in range(5):
            sampled = train.replay_buffer.get_samples(train.batch_size)
            state_batch, action_batch, value_batch = zip(*sampled)
            # SB need to read file from state.txt file to prevent json file is too big
            states = []
            for state_path in state_batch:
                world = World(state_path)
                state = world.current_state()
                states.append(state)
            # need change visit count to probability distribution
            actions = []
            for visit_count in action_batch:
                visit_count_array = np.array(visit_count)
                total_visit = np.sum(visit_count_array)
                probability_distribution = visit_count_array / total_visit
                actions.append(probability_distribution)
            state_batch_reshaped = np.reshape(
                states, (-1, train.obs_shape[1], train.obs_shape[0],
                         train.obs_shape[2]))
            action_batch_reshaped = np.reshape(actions, (-1, train.act_shape))
            value_batch_reshaped = np.reshape(value_batch, (-1, 1))
            loss, entropy = train.network.train_step(state_batch_reshaped,
                                                     action_batch_reshaped,
                                                     value_batch_reshaped,
                                                     train.learning_rate)
        print(loss, entropy)
        train.network.save_model(os.path.join(os.getcwd(), "latest"))
Exemple #2
0
def network_thread(player, temp_folder):

    print("Network thread started for " + temp_folder)

    state_path = os.path.join(temp_folder, "State.txt")
    policy_value_path = os.path.join(temp_folder, "PolicyValue.txt")
    policy_value_temp = os.path.join(temp_folder, "PolicyValueTemp.txt")

    while True:
        sleep(0.05)
        if os.access(state_path, os.F_OK) and os.access(state_path, os.R_OK):
            try:
                world = World(state_path, width, height)
                state = world.current_state()
                os.remove(state_path)

                policy, value = player.policy_value(state)
                #print(policy, value)

                policy_str = ""
                for i in range(action_count):
                    policy_str += str(policy[i])
                    if i != action_count - 1:
                        policy_str += "_"
                value_str = str(value)

                with open(policy_value_temp, 'w') as f:
                    f.write(policy_str + "\n")
                    f.write(value_str)

                if os.access(policy_value_path, os.F_OK):
                    os.remove(policy_value_path)

                shutil.move(policy_value_temp, policy_value_path)

            except Exception as e:
                print(str(e))