def play_step(self, epsilon=0.0): done_reward = None if np.random.random() < epsilon: action = self.env.action_space.sample() else: state_a = np.expand_dims(np.array(self.state, copy=False, dtype=np.float32), 0) / 255.0 state_v = tf.convert_to_tensor(state_a) if self.use_categorical: q_vals_v = self.net.q_values(state_v) else: q_vals_v = self.net(state_v) act_v = tf.math.argmax(q_vals_v, axis=1) action = int(act_v.numpy()[0]) new_state, reward, is_done, _ = self.env.step(action) self.total_reward += reward exp = xp.Experience(self.state, action, reward, is_done) self.exp_buffer.append(exp) self.state = new_state if is_done: done_reward = self.total_reward self._reset() return done_reward
def compute_experience(self): """ Compute from the start date of project (i.e. first commit) until today all the experience of the developer based on their first commit date and when they left the project (if they left it). """ curr_date = self.vcs_mgr.first_commit_repo + datetime.timedelta(1) end_date = datetime.date.today() while curr_date <= end_date: curr_xp = experience.Experience(curr_date) for dev in self.vcs_mgr.author_dict.values(): if not dev.exclude: curr_xp.process_dev(dev) self.experiences.append(curr_xp) curr_date = XPAnalyser.increment_date(curr_date, XPAnalyser.DEFAULT_INCREMENT) self.save_analyse()
def simulate(model): # Instantiating the learning related parameters learning_rate = get_learning_rate(0) explore_rate = get_explore_rate(0) discount_factor = 0.99 rewards = [] num_streaks = 0 env.render() # Initialize experience replay object experience = exp.Experience(model, max_memory=max_memory) for n_episode in range(NUM_EPISODES): loss = 0.0 # Reset the environment obv = env.reset() # the initial state state_0 = state_to_bucket(obv) total_reward = 0 n_episodes = 0 envstate = env.render() envstate = resize(envstate, (10, 10)) envstate = envstate.reshape((1, -1)) for t in range(MAX_T): # Select an action action = select_action(envstate, model, explore_rate) prev_envstate = envstate # execute the action obv, reward, done, _ = env.step(action) # Observe the result state = state_to_bucket(obv) total_reward += reward envstate = env.render() envstate = resize(envstate, (10, 10)) envstate = envstate.reshape((1, -1)) # Store episode (experience) episode = [ prev_envstate, action, reward, envstate, env.is_game_over() ] experience.remember(episode) n_episodes += 1 # Setting up for the next iteration state_0 = state # Train neural network model inputs, targets = experience.get_data(data_size=data_size) h = model.fit( inputs, targets, epochs=8, batch_size=16, verbose=0, ) loss = model.evaluate(inputs, targets, verbose=0) # Print data template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f}" print( template.format(n_episode, NUM_EPISODES - 1, loss, n_episodes, sum(win_history), win_rate, t)) # we simply check if training has exhausted all free cells and if in all # cases the agent won if win_rate > 0.9: epsilon = 0.05 if sum(win_history[-hsize:]) == hsize and completion_check( model, qmaze): print("Reached 100%% win rate at epoch: %d" % (epoch, )) break # Render tha maze if RENDER_MAZE: env.render() if REALTIME_RENDERING: time.sleep(0.1) if env.is_game_over(): sys.exit() if done: rewards.append(total_reward) print( "Episode %d finished after %f time steps with total reward = %f (streak %d)." % (n_episode, t, total_reward, num_streaks)) if t <= SOLVED_T: num_streaks += 1 else: num_streaks = 0 break elif t >= MAX_T - 1: print("Episode %d timed out at %d with total reward = %f." % (n_episode, t, total_reward)) # Update parameters explore_rate = get_explore_rate(n_episode) learning_rate = get_learning_rate(n_episode) plt.plot(rewards) plt.title('Episode rewards') plt.xlabel('n_episode') plt.ylabel('Reward') plt.show()
from numpy.random import randint as rdi max_explore_rate = 0.40 min_explore_rate = 0.10 epoch = 1000000 height = 32 width = 32 channel = 4 xp_capacity = 1000 xp_nb_batch = 32 play_interval = 1 save_interval = 1000 game = game.Game() net = model.CNN(height, width, channel, game.actions, "version01") exp = xp.Experience(xp_capacity, game, channel, net) for i in range(epoch): print "Iteration", i # if (i%play_interval==play_interval-1) : # explore_rate = 0.0 # else : # explore_rate = min_explore_rate+(epoch-i)*1.0/epoch*(max_explore_rate-min_explore_rate) explore_rate = 0.00 step = 0 while True: step += 1 x = random() if (x < explore_rate): action = rdi(0, game.actions) else:
""" ##if __name__ == "__main__": ## p = int(sys.argv[1]) ## m = int(sys.argv[2]) """ p=5 m=10 assert (m > 0), "The number of markers must be greater than 0" assert (p <= m), "The number of positive markers must be less or equal to the number of markers" exp = experience.Experience(p,m) markers = exp.get_markers() positive = exp.get_positive_markers() print("Markers: %s" % (markers)) print("Positive markers: %s" % (positive)) # test stategy 1 cpt = 0 print("Negative markers: %s" % (negative_markers1(markers,positive))) print("Nb. comparisons: %d" % (cpt)) # test stategy 2 cpt = 0 print("Negative markers: %s" % (negative_markers2(markers,positive))) print("Nb. comparisons: %d" % (cpt))
def simulate(model): # Instantiating the learning related parameters learning_rate = get_learning_rate(0) explore_rate = 0.1 discount_factor = 0.99 rewards = [] num_streaks = 0 env.render() # Initialize experience replay object experience = exp.Experience(model, max_memory=max_memory) for n_episode in range(NUM_EPISODES): loss = 0.0 if n_episode > 20: explore_rate = 0.05 # Reset the environment obv = env.reset() # the initial state state_0 = state_to_bucket(obv) total_reward = 0 n_episodes = 0 envstate = env.render() envstate = resize(envstate, (10, 10)) envstate = envstate.reshape((1, -1)) for t in range(MAX_T): prev_envstate = envstate # Get next action action = select_action(prev_envstate, model, explore_rate) # execute the action obv, reward, done, _ = env.step(action) # Observe the result state = state_to_bucket(obv) total_reward += reward envstate = env.render() envstate = resize(envstate, (10, 10)) envstate = envstate.reshape((1, -1)) # Store episode (experience) episode = [prev_envstate, action, reward, envstate, env.is_game_over()] experience.remember(episode) n_episodes += 1 # Setting up for the next iteration state_0 = state # Train neural network model inputs, targets = experience.get_data(data_size=data_size) h = model.fit( inputs, targets, epochs=8, batch_size=64, verbose=0, ) loss = model.evaluate(inputs, targets, verbose=0) # Render tha maze if RENDER_MAZE: env.render() if REALTIME_RENDERING: time.sleep(0.1) if env.is_game_over(): sys.exit() if done: rewards.append(total_reward) print("Episode %d finished after %f time steps with total reward = %f (streak %d)." % (n_episode, t, total_reward, num_streaks)) if t <= SOLVED_T: num_streaks += 1 else: num_streaks = 0 break elif t >= MAX_T - 1: print("Episode %d timed out at %d with total reward = %f." % (n_episode, t, total_reward)) # Print data template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d}" print(template.format(n_episode, NUM_EPISODES - 1, loss, n_episodes)) # It's considered done when it's solved over 100 times consecutively if num_streaks > STREAK_TO_END: break # Update parameters explore_rate = get_explore_rate(n_episode) learning_rate = get_learning_rate(n_episode) plt.plot(rewards) plt.title('Episode rewards') plt.xlabel('n_episode') plt.ylabel('Reward') plt.show()