Ejemplo n.º 1
0
    def reset(self, Date, path):
        self.env = envirment.Env(Date, path)

        self.a = np.zeros((1,self.a_dim))
        for i in range(self.a_dim):
            if np.random.uniform()<0.2:
                self.a[0,i] = 1
        self.a = np.ravel(self.a)

        self.a_last = np.ravel(np.zeros((1,self.a_dim)))
        self.s = np.ravel(np.zeros((1,self.s_dim)))
        self.s_ = np.ravel(np.zeros((1,self.s_dim)))
        self.s = self.env.update(self.a, self.a_last, 0, 0)
        self.s_ = self.s
        self.a_last = self.a
        self.s = np.transpose(self.s)
        self.s_ = np.transpose(self.s_)
Ejemplo n.º 2
0
    os.makedirs(path+'/car_L1_reward_figure')
    os.makedirs(path+'/AdaptSpeed')
    os.makedirs(path+'/ConvergenceRate')
    os.makedirs(path+'/log')
    os.makedirs(path+'/result_images')
    os.makedirs(path+'/SystemPerformance')

Date = 1001
'''
main program
'''
# dqn = dqn.DQN()
Agent = control_group.GreedyPolicy.Greedy(a_dim)
ddpg = PDDPG.PDDPG(a_dim, s_dim)
# ddpg = ddpg.DDPG(a_dim, s_dim)
env = envirment.Env(Date, path)
# retarder = Queue()
sys_per = output.SystemPerformance(a_dim)
line_fig = output.LineFigures(a_dim)
conv_rate = output.ConvergenceRate(MAX_EPISODES, SLOTNUM, a_dim, s_dim)
adapt_speed = output.AdaptSpeed(SLOTNUM, a_dim)


EPSILON = 0.95

a = np.zeros((1,a_dim))
for i in range(a_dim):
    if np.random.uniform()<0.2:
        a[0,i] = 1
a = np.ravel(a)