def __init__(self, tau=True):
     self.action_num = 4
     self.shape = [7, 7]
     self.time = 0
     self.state = self.reset()
     self.components = 7
     _, state_num = np.where(self.state[:, 0:6] == 1)
     self.state_num = state_num
     if tau:
         plot_state(self.state, X=7, Y=7)
 def randomint(self, stau=False):
     s = np.random.randint(0, 6, 7)
     self.time = np.random.randint(0, 100, 1)[0]
     raw_s = np.zeros([7, 7], dtype=np.int32)
     raw_s[:, 0:6] = processsa(s, 6)
     raw_s[:, 6] = time_encoder(self.time)
     if stau == True:
         plot_state(raw_s, 7, 7)
     self.state = raw_s
     self.state_num = s
     return self.state, self.time
    actions = np.zeros([num_episodes,100,7], dtype=np.int32)
    states = np.zeros([num_episodes,101,7], dtype=np.int32)

    path1 = simulation_path + '/training-step' + str(step)
    if os.path.exists(path1)==False:
        os.mkdir(path1)
    for i_episode in range(num_episodes):
        t=0
        rAll = 0
        done  = False
        current_state = env.reset()
        a = []
        states[i_episode,0] = env.state_num
        while not done:
            plt.figure(1)
            plot_state(current_state, 7, 7)
            plt.xlabel('states', **font)
            plt.ylabel('components', **font)
            state = np.reshape(current_state,[49])
            action = sess.run(mainQN.predict, feed_dict={mainQN.scalarInput: [state]})[0]
            next_state, next_reward, done = env.step(action)
            next_reward = next_reward/600
            rAll += np.sum(next_reward)
            actions[i_episode,t] = action
            states[i_episode,t+1] = env.state_num

            plt.title('action:' + str(action)  + '  time:' + str(env.time) + '\n' + 'reward:' +
                      np.array2string(next_reward,precision=2) +' total costs:' + str("%.2f"%rAll),**font)
            plt.savefig(path1 + '/' + str(t) + '.eps')
            # # plt.savefig(c_path + '/' + str(t) + '.tif')
            # plt.savefig(path1 + '/' + str(t) + '.png')
 def render(self):
     plot_state(self.state, 7, 7)