from utils.net import simple_net import pickle from keras.models import load_model seed = 1234 np.random.seed(seed) def demand(): while True: d = int(np.random.normal(10, 5)) if d < 0: d = 0 yield d demand_gen = demand() # env_exp3 = BeerGameEnv(demand_gen, lag=2) api = env_exp3.start_play() state, r, d = next(api) shape = np.array(state).flatten().shape # # agents_exp3 = [Agent(policy="ar")] # agents_exp3.append(Agent()) # agents_exp3.append(DQN(state_shape=shape, n_action=25, net=simple_net)) # agents_exp3.append(Agent()) # # bg_exp3 = chain_wrapper(agents_exp3, env_exp3) # bg_exp3.play_mixed(episode=1000) # # plt.plot(np.array(bg_exp3.agents[0].cum_r) + np.array(bg_exp3.agents[1].cum_r) # + np.array(bg_exp3.agents[2].cum_r) + np.array(bg_exp3.agents[3].cum_r)) # plt.show()
from Beer_game.beer_game_env import BeerGameEnv import numpy as np import time import tqdm def demand(): while True: yield np.random.randint(10) demand_gen = demand() env = BeerGameEnv(demand_gen, lag=2) api = env.start_play() next(api) env.on_order a = np.random.randint(10) api.send(a) env.cost env.week state = env.reset() action = np.array([0, 0, 0, 0]) env.stock env.trans state, cost, done = env.step(action)
import numpy as np from Beer_game.wrapper import chain_wrapper import matplotlib.pyplot as plt import pickle eps = 10 batch_size = 32 def demand(): while True: yield np.random.uniform(10) demand_gen = demand() env = BeerGameEnv(demand_gen, lag=2) api = env.start_play() agents = [] state, r, d = next(api) r d shape = np.array(state).flatten().shape shape state for i in range(4): agents.append(DQN(state_shape=shape, n_action=10, net=simple_net)) bg = chain_wrapper(agents, env)
seed = 1234 np.random.seed(seed) def demand(): while True: d = int(np.random.normal(10, 5)) if d < 0: d = 0 yield d demand_gen = demand() env_exp4 = BeerGameEnv(demand_gen, lag=2) agents_exp4 = [] for i in range(4): agents_exp4.append( DQN(state_shape=(8, ), n_action=25, net=simple_net, model_path='models/dqn4')) bg_exp4 = chain_wrapper(agents_exp4, env_exp4) bg_exp4.play(episode=1000000) plt.plot( np.array(bg_exp4.agents[0].cum_r) + np.array(bg_exp4.agents[1].cum_r) + np.array(bg_exp4.agents[2].cum_r) + np.array(bg_exp4.agents[3].cum_r)) plt.savefig('models/cum_r.png')