Example #1
0
    def train(self, episodes=1000, max_steps=800, plot_rewards=True):
        # Initialize target network weights
        self.actor.update_target_model(copy=True)
        self.critic.update_target_model(copy=True)
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  time:",
                  time.time() - start)

        ensure_saved_models_dir()

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            plot(scores)
            plot_running_avg(scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_steps", steps)
Example #2
0
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  epsilon:", self.epsilon, "  time:",
                  time.time() - start)
            '''if e%100 == 0:
				ensure_saved_models_dir()
				self.model.save_weights(FINAL_WEIGHTS_PATH)
				print("Weights Saved")'''
        ensure_saved_models_dir()
        self.model.save_weights(FINAL_WEIGHTS_PATH)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            plot(scores)
            plot_running_avg(scores)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_scores",
                    scores)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_time",
                    t_time)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_steps",
                    steps)
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        scores = np.empty(episodes)
        for e in range(episodes):
            score = self.run_episode(max_steps)
            scores[e] = score
            print("Episode:", e, "  score:", score, "  epsilon:", self.epsilon)

        ensure_saved_models_dir()
        self.model.save_weights(FINAL_WEIGHTS_PATH)

        if plot_rewards:
            plot(scores)
            plot_running_avg(scores)
 def serial_pretrain(self, rows=10000, epochs=10):
     targets = np.empty((rows, self.action_size))
     states = np.empty((rows, self.state_size))
     for i in range(rows):
         p = self.env.observation_space.sample()
         states[i] = self.state_transformer.transform(p)
         self.fill_target(p, targets[i])
         if i % 100 == 0:
             print("%.1f %%" % (i / rows * 100))
     self.model.fit(states,
                    targets,
                    batch_size=self.batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1)
     self.update_target_model()
     ensure_saved_models_dir()
     self.model.save_weights(PRETRAIN_WEIGHTS_PATH)
Example #5
0
 def serial_pretrain(self,
                     rows=16000000,
                     batch_size=32,
                     act_size=10,
                     epochs=3):
     start = time.time()
     r = int(rows / batch_size)
     for j in range(r):
         states = np.empty((batch_size, self.state_size))
         actions = np.random.randint(self.action_size,
                                     size=(batch_size, act_size))
         targets = np.empty((batch_size, act_size))
         for i in range(batch_size):
             p = self.env.observation_space.sample()
             states[i] = self.state_transformer.transform(p)
             for k in range(act_size):
                 _, ia, ja, ka = self.env.actions[actions[i][k]]
                 targets[i][k] = v_upperbound_breakpoints(p, ia, ja, ka)
             if i % 100 == 0:
                 print(j,
                       "-- %.6f %%" % ((j * batch_size + i) / rows * 100),
                       time.time() - start)
         print(j, "-- %.6f %%" % ((j * batch_size + i) / rows * 100),
               time.time() - start, "-- UPDATE")
         for i in range(epochs):
             self.critic_train_model(
                 np.repeat(states, act_size, axis=0),
                 self.enc_actions[actions.reshape(-1, )].todense(),
                 targets.reshape(-1, 1))
             self.actor_train_model(states)
         targets, states, actions = None, None, None
         gc.collect()
         if j % 100000 == 0:
             ensure_saved_models_dir()
             saver = tf.train.Saver()
             saver.save(self.session, self.pretrain_path)
             print("Pretrain weights saved")
     ensure_saved_models_dir()
     saver = tf.train.Saver()
     saver.save(self.session, self.pretrain_path)
     print("Pretrain weights saved")
     self.session.run(self.target_init)
Example #6
0
	def serial_pretrain(self, rows=100000, batch_size=64, epochs=10):
		start = time.time()
		r = int(rows / batch_size)
		for j in range(r):
			targets = np.empty((batch_size, self.action_size))
			states = np.empty((batch_size, self.state_size))
			for i in range(batch_size):
				p = self.env.observation_space.sample()
				states[i] = self.state_transformer.transform(p)
				self.fill_target(p, targets[i])
				if i % 100 == 0:
					print(j, "-- %.6f %%" % ((j*batch_size + i)/rows * 100), time.time() - start)
			actions = np.argmax(targets, axis=1)
			print(j, "-- %.6f %%" % ((j * batch_size + i) / rows * 100), time.time() - start, "-- UPDATE")
			for i in range(epochs):
				self.update(states, actions, targets)
			targets, states = None, None
			gc.collect()
		ensure_saved_models_dir()
		saver = tf.train.Saver()
		saver.save(self.session, self.pretrain_path)
		self.update_target_model()
Example #7
0
	def train(self, episodes=1000, max_steps=800, plot_rewards=True):
		scores, steps, losses = np.zeros(episodes), np.zeros(episodes), np.zeros(episodes)
		start = time.time()
		saver = tf.train.Saver()
		if self.fill_mem:
			self.fill_memory()
		for e in range(episodes):
			score, step, loss = self.run_episode(max_steps)
			scores[e], steps[e], losses[e] = score, step, loss
			print("Episode:", e, "  steps:", step, "  score: %.1f" % score,"  loss:", loss, "  epsilon:", self.epsilon, "  time:", time.time() - start)
			if math.isnan(loss): break
		ensure_saved_models_dir()
		saver.save(self.session, self.train_path)

		if plot_rewards:
			t_time = time.time() - start
			print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time)
			np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_scores", scores)
			np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_time", t_time)
			np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_steps", steps)
			plot(steps)
			plot_running_avg(steps)
			plot_running_avg(losses, title="Losses")
    def parallel_pretrain(self, rows=10000, epochs=10, n_threads=8):
        def f(i):
            p = self.env.observation_space.sample()
            states[i] = self.state_transformer.transform(p)
            self.fill_target(p, targets[i])
            cur = progress.inc()
            if cur % 100 == 0:
                print("%.1f %%" % (cur / rows * 100))

        progress = AtomicInteger()
        targets = np.empty((rows, self.action_size))
        states = np.empty((rows, self.state_size))
        pool = ThreadPool(n_threads)
        pool.map(f, range(rows))
        self.model.fit(states,
                       targets,
                       batch_size=self.batch_size,
                       epochs=epochs,
                       verbose=1,
                       validation_split=0.1)
        self.update_target_model()
        ensure_saved_models_dir()
        self.model.save_weights(PRETRAIN_WEIGHTS_PATH)
Example #9
0
    def train(self, episodes=1000, max_steps=800, plot_rewards=True):
        # Initialize target network weights
        self.actor.update_target_model(copy=True)
        self.critic.update_target_model(copy=True)
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        break_flag = 0
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  time:",
                  time.time() - start)
            if e % 50 == 0 and step == max_steps and self.fill_mem:
                self.fill_memory()
            break_flag = break_flag + 1 if step == max_steps else 0
            if break_flag > 50 and e >= episodes / 2: break
        ensure_saved_models_dir()
        saver = tf.train.Saver()
        saver.save(self.session, self.train_path)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_steps", steps)
            plot(steps)
            plot_running_avg(steps)