Python BlackjackEnv.reset Examples

Programming Language: Python

Namespace/Package Name: lib.envs.blackjack

Class/Type: BlackjackEnv

Method/Function: reset

Examples at hotexamples.com: 2

Python BlackjackEnv.reset - 2 examples found. These are the top rated real world Python examples of lib.envs.blackjack.BlackjackEnv.reset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BlackjackEnv(10)

reset(2)

step(2)

Frequently Used Methods

BlackjackEnv (10)

reset (2)

step (2)

Example #1

Show file

File: mc_eval.py Project: apengelbrecht/alexeyche-junk


policy = sample_policy
discount_factor = 1.0

returns_sum = defaultdict(float)
returns_count = defaultdict(float)
V = defaultdict(float)
num_episodes = 10000
t_steps = 100

R = np.zeros((num_episodes, 1))

for i_episode in range(1, num_episodes + 1):
    episode = []
    state = env.reset()
    rewards = 0.0
    for t in range(t_steps):
        action = policy(state)
        next_state, reward, done, _ = env.step(action)
        episode.append((state, action, reward))

        if done:
            break
        state = next_state

    states_in_episode = set([s for s, _, _ in episode])
    for s in states_in_episode:
        first_occurence_idx = next(i for i, x in enumerate(episode)
                                   if x[0] == state)
        G = sum([

Example #2

Show file

File: testenv.py Project: vicpang/intro_rf

if "../" not in sys.path:
	sys.path.append("../")
from lib.envs.blackjack import BlackjackEnv

env=BlackjackEnv() 



def print_observation(observation):
    score, dealer_score, usable_ace = observation
    print("Player Score: {} (Usable Ace: {}), Dealer Score: {}".format(
          score, usable_ace, dealer_score))

def strategy(observation):
    score, dealer_score, usable_ace = observation
    # Stick (action 0) if the score is > 20, hit (action 1) otherwise
    return 0 if score >= 20 else 1

for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        print_observation(observation)
        action = strategy(observation)
        print("Taking action: {}".format( ["Stick", "Hit"][action]))
        observation, reward, done, _ = env.step(action)
        if done:
            print_observation(observation)
            print("Game end. Reward: {}\n".format(float(reward)))
            break