Esempio n. 1
0
 dont_burn_my_cpu = pygame.time.Clock()
 num = 0
 mon = {'top': 0, 'left': 0, 'width': 200, 'height': 200}
 sct = mss.mss()
 write_to_file_step = 0.95
 broj_ociscenih_linija_file = 0
 broj_partija_file = 0
 while 1:
     app.screen.fill((0, 0, 0))
     if app.gameover:
         if br == None:
             broj_ociscenih_linija_file += app.lines
             broj_partija_file += 1
             episodes = 0
             app.gameover = True
             agent.replay(800)
             agent.target_train()
             app.start_game()
             tetris = Tetris()
             # belezenje rezultata
             if agent.epsilon < write_to_file_step:
                 with open("deep-q-learning-results.txt", "a") as myfile:
                     myfile.write(
                         "Broj ociscenih linija za exploration rate veci od "
                         + str(write_to_file_step) + " je " +
                         str(broj_ociscenih_linija_file) +
                         " u broju partija " + str(broj_partija_file) +
                         "\n")
                     write_to_file_step = write_to_file_step - 0.05
                     broj_ociscenih_linija_file = 0
                     broj_partija_file = 0
        batch_size = 16
        loss_overall = []
        error_avg = []
        error = []
        states = sim_env.state
        for e in range(episodes):
            sim_env.reset()
            for k in range(training):
                states = np.reshape(states, [1, state_size])
                action = QN.act(states)
                next_state, rewards, overall_err = sim_env.Assign_Cores(action)
                next_state = np.reshape(next_state, [1, state_size])
                QN.remember(states, action, rewards, next_state)
                states = next_state
                if len(QN.memory) > batch_size:
                    QN.replay(batch_size)

            loss_overall = np.append(loss_overall, QN.loss_avg/training)
            QN.loss_avg = 0

            sim_env.reset()
            for u in range(testing):
                states = np.reshape(states, [1, state_size])
                action = QN.act_test(states)
                # print('SNR:', sim_env.SNR[-1])
                # print('action:', sim_env.action[action])
                next_state, rewards, overall_err = sim_env.Assign_Cores(action)
                error = np.append(error, overall_err)
                next_state = np.reshape(next_state, [1, state_size])
                states = next_state
            print(e)
Esempio n. 3
0
batch_size = 32

n_episodes = 500

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

agent = DQNAgent(state_size , action_size)
done = False

for i in range(n_episodes):
	state = env.reset()
	state = np.reshape(state,[1,state_size])
	for time in range(5000):
		if(i % 50 == 0):
			env.render()
		action = agent.act(state)
		next_state,reward,done, _ = env.step(action)
		reward = reward if not done else -10
		next_state = np.reshape(next_state,[1,state_size])
		if(np.random.rand() < 0.3):
			agent.remember(state,action,reward,next_state,done)
		state = next_state
		if done:
			print("episode: {}/{} , score: {}".format(i,n_episodes,time))
			break
	if(len(agent.memory) > batch_size):
		agent.replay(batch_size)

	if i % 100 == 0:
		agent.save("weights_cartpole_{}.hdf5".format(i))