Python Estimator.update Examples

Programming Language: Python

Namespace/Package Name: estimator

Class/Type: Estimator

Method/Function: update

Examples at hotexamples.com: 5

Python Estimator.update - 5 examples found. These are the top rated real world Python examples of estimator.Estimator.update extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Estimator(30)

feed_data(6)

predict(6)

update(5)

__init__(4)

run(3)

fit(3)

train(3)

save(2)

estimate(2)

total_voc_size(1)

log_progress(1)

train_roam(1)

prediction(1)

process_all(1)

process_next_val(1)

resnet_predict(1)

train_crossval(1)

save_model_to_file(1)

save_model(1)

to_serialized_object(1)

save_prediction_to_file(1)

sections_docs_count(1)

set_exit_flag(1)

load_state_dict(1)

td_errors(1)

test(1)

test_speed(1)

load_word_cond_probs(1)

load_conditional_probabilities(1)

load_name_cond_probs(1)

fit_transform(1)

build_decode_dict(1)

calculate_mu(1)

compute(1)

copy_model_from(1)

decode(1)

epoch(1)

est_epoch(1)

est_speed(1)

estimate_total_light(1)

fit_gen(1)

from_serialized_object(1)

load_model(1)

getSingletons(1)

get_conf_interval(1)

get_mean(1)

get_params(1)

get_rel_error(1)

get_states(1)

Example #1

Show file

File: train.py Project: pawel-kieliszczyk/openai-solutions

                    action = estimator_1.predict(sess, [state])[0]
                else:
                    action = estimator_2.predict(sess, [state])[0]

            if random_action_probability > random_action_probability_end:
                random_action_probability *= random_action_probability_decay

            next_state, reward, done, _ = env.step(action)

            replay_memory.add(state, action, reward, next_state, done)

            batch_s, batch_a, batch_r, batch_s1, batch_d = replay_memory.get_samples(
                batch_size)
            if batch_s.shape[0] == batch_size:
                if global_step % 2 == 0:
                    estimator_1.update(sess, estimator_2, batch_s, batch_a,
                                       batch_r, batch_s1, batch_d)
                else:
                    estimator_2.update(sess, estimator_1, batch_s, batch_a,
                                       batch_r, batch_s1, batch_d)

            global_step += 1

            if done:
                recent_timesteps.append(t + 1)
                print("Episode {} finished after {} timesteps (average {})".
                      format(i_episode, t + 1, np.mean(recent_timesteps)))
                break

            state = next_state

Example #2

Show file

                                  (state, action, reward, next_state, done))
            else:
                error = estimator_2.td_errors(sess, estimator_1, [state],
                                              [action], [reward],
                                              [next_state])[0]
                replay_memory.add(error,
                                  (state, action, reward, next_state, done))

            samples = replay_memory.sample(batch_size)
            indices_batch, samples_batch = map(np.array, zip(*samples))
            states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(
                np.array, zip(*samples_batch))

            if global_step % 2 == 0:
                estimator_1.update(sess, estimator_2, states_batch,
                                   action_batch, reward_batch,
                                   next_states_batch, done_batch)
                errors = estimator_1.td_errors(sess, estimator_2, states_batch,
                                               action_batch, reward_batch,
                                               next_states_batch)
                for i in range(len(indices_batch)):
                    replay_memory.update(indices_batch[i], errors[i])
            else:
                estimator_2.update(sess, estimator_1, states_batch,
                                   action_batch, reward_batch,
                                   next_states_batch, done_batch)
                errors = estimator_2.td_errors(sess, estimator_1, states_batch,
                                               action_batch, reward_batch,
                                               next_states_batch)
                for i in range(len(indices_batch)):
                    replay_memory.update(indices_batch[i], errors[i])

Example #3

Show file

File: train.py Project: pawel-kieliszczyk/openai-solutions

            target_estimator.copy_model_from(sess, q_estimator)

        for t in range(500):
            env.render()

            action = None
            if np.random.rand(1) < random_action_probability:
                action = env.action_space.sample()
            else:
                action = q_estimator.predict(sess, [state])[0]

            if random_action_probability > random_action_probability_end:
                random_action_probability *= random_action_probability_decay

            next_state, reward, done, _ = env.step(action)

            replay_memory.add(state, action, reward, next_state, done)

            batch_s, batch_a, batch_r, batch_s1, batch_d = replay_memory.get_samples(
                batch_size)
            if batch_s.shape[0] == batch_size:
                q_estimator.update(sess, target_estimator, batch_s, batch_a,
                                   batch_r, batch_s1, batch_d)

            if done:
                print("Episode {} finished after {} timesteps".format(
                    i_episode, t + 1))
                break

            state = next_state

Example #4

Show file

File: player.py Project: boa50/jogo-da-velha-sgd

class Player:
    def __init__(self, step_size=0.1, epsilon=0.1, symbol=0):
        self.step_size = step_size
        self.epsilon = epsilon
        self.previous_state = State()
        self.state = None
        self.symbol = symbol
        self.td_errors = []

        self.estimator = Estimator()
        self.policy = make_epsilon_greedy_policy(self.estimator)
        self.action = (0, 0)

        self.actions = []
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                self.actions.append((i, j))

    # Adiciona informação do novo estado
    def set_state(self, state):
        if self.state != None:
            self.previous_state.data = np.copy(self.state.data)
        self.state = state

    def set_symbol(self, symbol):
        self.symbol = symbol

    def set_epsilon(self, epsilon):
        self.epsilon = epsilon

    # Faz o update da estimação
    def backup(self, next_state, other=False):
        is_end = next_state.is_end()
        reward = 0
        if is_end:
            if next_state.winner == self.symbol:
                reward = 1
            elif next_state.winner == -self.symbol:
                reward = -1
            else:
                reward = 0

        if other:
            next_state.data = np.copy(self.state.data)
            self.state = self.previous_state

        # Update do TD
        q_values_next = self.estimator.predict(next_state)

        # Q-value para o TD Target
        if is_end:
            td_target = reward
        else:
            gamma = 1
            td_target = reward + gamma * np.max(q_values_next)

        # Cálculo do TD error
        td = self.estimator.predict(self.state, self.action)
        td_error = np.abs(td_target - td)
        self.td_errors.append(td_error)

        # Atualiza o aproximador usando o td_target
        self.estimator.update(self.state, self.action, td_target)

    # Escolhe uma ação baseada no estado
    def act(self):
        action_probs = self.policy(self.state, self.epsilon)
        action_idx = np.random.choice(np.arange(len(self.actions)),
                                      p=action_probs)
        self.action = self.actions[action_idx]

        next_state = self.state.next_state(self.action[0], self.action[1],
                                           self.symbol)
        is_end = next_state.is_end()

        self.backup(next_state)

        return next_state, is_end

    def save_policy(self, epoch):
        with open(
                'app/saves/policy_%s_%d.bin' %
            (('first' if self.symbol == 1 else 'second'), epoch), 'wb') as f:
            pickle.dump(self.estimator, f)

        path = 'app/saves/metrics_%s.csv' % ('first'
                                             if self.symbol == 1 else 'second')
        metrics_file = open(path, "a")
        with metrics_file:
            writer = csv.writer(metrics_file)
            for td_error in self.td_errors:
                writer.writerow([td_error])

        self.td_errors.clear()

    def load_policy(self, epoch):
        with open(
                'app/saves/policy_%s_%d.bin' %
            (('first' if self.symbol == 1 else 'second'), epoch), 'rb') as f:
            self.estimator = pickle.load(f)
            self.policy = make_epsilon_greedy_policy(self.estimator)

Example #5

Show file

class AI:
    def __init__(self,
                 load=None,
                 filepath='best_estimator.h5',
                 num_episodes=400,
                 eval_episodes=20,
                 update_freq=80,
                 mcts_iters=100,
                 tau_cutoff=20):
        self.num_episodes = num_episodes
        self.eval_episodes = eval_episodes
        self.update_freq = update_freq
        self.mcts_iters = mcts_iters
        self.tau_cutoff = tau_cutoff
        self.filepath = filepath
        to_load = load or filepath
        if os.path.isfile(to_load):
            self.estimator = Estimator(State.raw_shape,
                                       len(State.domain),
                                       filepath=to_load)
        else:
            self.estimator = Estimator(State.raw_shape, len(State.domain))

    def duel(self, opponent, first=1):
        '''Play a full game against an opponent AI.'''

        if first == -1:
            e0, e1 = opponent, self.estimator
        else:
            e0, e1 = self.estimator, opponent

        s0 = MCTS(e0, maxiter=self.mcts_iters)
        s1 = MCTS(e1, maxiter=self.mcts_iters)

        while not s0.state.over:

            a = State.domain[np.argmax(s0.search())]

            s0.apply(a)
            s1.apply(a)

            if s0.state.over:
                break

            a = State.domain[np.argmax(s1.search())]

            s1.apply(a)
            s0.apply(a)

        return s0.state.winner

    def simulate(self, first=1):
        '''Simulate a full game by self-playing.'''

        mcts = MCTS(estimator=self.estimator,
                    epsilon=0.25,
                    maxiter=self.mcts_iters,
                    first=first)
        history = []
        tau = 1.0

        while not mcts.state.over:

            if len(history) == self.tau_cutoff:
                tau = 0.1

            policy = mcts.search(tau)
            history.append((mcts.state.raw, policy))

            a = np.random.choice(State.domain, p=policy)
            mcts.apply(a)

        return history, mcts.state.winner

    def train(self):

        games = []

        for i in range(self.num_episodes):

            history, winner = self.simulate(first=np.random.choice([-1, 1]))
            print("Game --> winner:", State.player_codes[winner], "moves:",
                  len(history))
            games.append((history, winner))

            if i % self.update_freq + 1 == self.update_freq:

                print("Training new model...")
                new_estimator = self.estimator.update(games)

                score = 0
                for j in range(self.eval_episodes):
                    first = np.random.choice([-1, 1])
                    winner = self.duel(new_estimator, first=first)
                    score -= first * winner

                print("New model score:", score)
                if score >= ceil(0.05 * self.eval_episodes):
                    self.estimator = new_estimator
                    self.estimator.save(self.filepath)
                    print("New model selected.")
                else:
                    print("New model rejected.")

                games = games[-5 * self.eval_episodes:]  # truncate history