dont_burn_my_cpu = pygame.time.Clock() num = 0 mon = {'top': 0, 'left': 0, 'width': 200, 'height': 200} sct = mss.mss() write_to_file_step = 0.95 broj_ociscenih_linija_file = 0 broj_partija_file = 0 while 1: app.screen.fill((0, 0, 0)) if app.gameover: if br == None: broj_ociscenih_linija_file += app.lines broj_partija_file += 1 episodes = 0 app.gameover = True agent.replay(800) agent.target_train() app.start_game() tetris = Tetris() # belezenje rezultata if agent.epsilon < write_to_file_step: with open("deep-q-learning-results.txt", "a") as myfile: myfile.write( "Broj ociscenih linija za exploration rate veci od " + str(write_to_file_step) + " je " + str(broj_ociscenih_linija_file) + " u broju partija " + str(broj_partija_file) + "\n") write_to_file_step = write_to_file_step - 0.05 broj_ociscenih_linija_file = 0 broj_partija_file = 0
batch_size = 16 loss_overall = [] error_avg = [] error = [] states = sim_env.state for e in range(episodes): sim_env.reset() for k in range(training): states = np.reshape(states, [1, state_size]) action = QN.act(states) next_state, rewards, overall_err = sim_env.Assign_Cores(action) next_state = np.reshape(next_state, [1, state_size]) QN.remember(states, action, rewards, next_state) states = next_state if len(QN.memory) > batch_size: QN.replay(batch_size) loss_overall = np.append(loss_overall, QN.loss_avg/training) QN.loss_avg = 0 sim_env.reset() for u in range(testing): states = np.reshape(states, [1, state_size]) action = QN.act_test(states) # print('SNR:', sim_env.SNR[-1]) # print('action:', sim_env.action[action]) next_state, rewards, overall_err = sim_env.Assign_Cores(action) error = np.append(error, overall_err) next_state = np.reshape(next_state, [1, state_size]) states = next_state print(e)
batch_size = 32 n_episodes = 500 state_size = env.observation_space.shape[0] action_size = env.action_space.n agent = DQNAgent(state_size , action_size) done = False for i in range(n_episodes): state = env.reset() state = np.reshape(state,[1,state_size]) for time in range(5000): if(i % 50 == 0): env.render() action = agent.act(state) next_state,reward,done, _ = env.step(action) reward = reward if not done else -10 next_state = np.reshape(next_state,[1,state_size]) if(np.random.rand() < 0.3): agent.remember(state,action,reward,next_state,done) state = next_state if done: print("episode: {}/{} , score: {}".format(i,n_episodes,time)) break if(len(agent.memory) > batch_size): agent.replay(batch_size) if i % 100 == 0: agent.save("weights_cartpole_{}.hdf5".format(i))