Beispiel #1
0
def run_testcase(filename):
    # find destinations in folder starting_... and ending_...
    finder = findDestinations(filename)
    end = finder.returnDestination()
    start = finder.returnStarting()

    map_file = np.loadtxt('map.txt',dtype=int)

    # bounding negative values for keeping it in bounds
    map_file[0,:] = MIN_VALUE
    map_file[:,0] = MIN_VALUE
    map_file[:,len(map_file)-1]=MIN_VALUE
    map_file[len(map_file)-1,:]=MIN_VALUE
    
    # UAV map emulation
    env = Map(start,end,filename,map_file)
    RL = DeepQNetwork(env.n_actions, env.n_features,
                    learning_rate=0.01,
                    reward_decay=0.9,
                    e_greedy=0.9,
                    replace_target_iter=200,
                    memory_size=2000,
                    output_graph=True,
                    iteration=filename
                    )
    run_map(filename,RL,env)  
    RL.plot_cost()
    compare = Compare(filename)
    #compare to given results
    print("Finished iteration", filename)
Beispiel #2
0
def main():
    env = RideHitch("data/norm1000.txt")
    print(env.requests_list)
    RL = DeepQNetwork(env.pool_size,
                      env.state_num,
                      learning_rate=0.01,
                      reward_decay=0.99,
                      e_greedy=1,
                      replace_target_iter=200,
                      memory_size=2000,
                      output_graph=False,
                      T=env.T_threshold,
                      D=env.D_threshold)
    step = 0
    matched_list = []
    for episode in range(100):
        # init
        observation = env.reset(reset_seq=False)
        # if episode % 100 == 0:
        #     print(episode)
        matched = 0
        print("seq size:", env.request_num, "pool size:", env.pool_size)
        while True:
            action = RL.choose_action(observation)
            observation_, reward, done = env.step(action)
            if reward > 0:
                matched += 1
            RL.store_transition(observation, action, reward, observation_)
            if (step > 200) and (step % 5 == 0):
                RL.learn()
            observation = observation_
            if done:
                break
            step += 1
        matched_list.append(matched)
        print("eps", episode, "matching", matched)
    # print(matched_list)
    RL.plot_cost()
                    print(
                        'episode:' + str(episode) + ' steps:' + str(step) + ' reward:' + str(
                            rwd) + ' eps_greedy:' + str(
                            dqn.epsilon))
                rewards.append(rwd)
                break


if __name__ == '__main__':
    rewards = []

    env = Env(N_VM)

    memories = Memory(MEMORY_SIZE)

    dqn = DeepQNetwork(env.n_actions, env.n_features,
                       learning_rate=0.001,
                       replace_target_iter=200,
                       e_greedy_increment=3e-5
                       )

    run_env(EPISODES, MINI_BATCH)

    dqn.plot_cost()

    plt.plot(np.arange(len(rewards)), rewards)
    plt.plot(np.arange(len(rewards)), [138 for i in range(len(rewards))])
    plt.ylabel('reward')
    plt.xlabel('episode')
    plt.show()
            rl.store_transition(state, action, reward, state_)

            ep_r += reward
            if rl.memory_full:
                # start to learn once has fulfilled the memory
                rl.learn()

            state = state_
            if done:
                print('Ep: %i | %s | ep_r: %.1f' %
                      (i, '---' if not done else 'done', ep_r))
                break
    rl.save()


def eval():
    rl.restore()
    # env.render()
    state = env.reset(clf)
    while True:
        env.render()
        action = rl.choose_action(state)
        state, reward, done = env.step(action, clf)


if ON_TRAIN:
    train()
    rl.plot_cost()
else:
    eval()
Beispiel #5
0
DQN = DeepQNetwork(
    n_actions,
    n_features,
    learning_rate=0.03,
    reward_decay=0.9,
    replace_target_iter=150,
    memory_size=1000,
    # output_graph=True
)
t = threading.Thread(target=run)
t.daemon = True
t.start()
t.join
start_simulation()
DQN.plot_q_t()
DQN.plot_cost()

plot_values = []
accumulation = 0
for i in range(len(scores)):
    accumulation += scores[i]
    if (i + 1) % 500 == 0:
        plot_values.append(accumulation / 500.0)
        accumulation = 0

print DQN.epsilon
print DQN.learn_step_counter

#print scores
print plot_values
average = np.array(plot_values)
Beispiel #6
0
                break
            step += 1
        s.append[count]

    plt.plot(np.arange(len(s)), s)
    plt.ylabel('points to goal')
    plt.xlabel('training steps')
    plt.savefig("figPtsv1.png")

    total_time = start - time.time()
    f = open("trainTime.txt", "w+")
    f.write(total_time)
    f.close()
    print('Finished')


if __name__ == "__main__":
    # maze game
    env = Map()
    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      output_graph=True)
    run_map()

RL.plot_cost()
Beispiel #7
0
class view(tkinter.Tk):
    def __init__(self):
        self.gameStart=False
        self.status=False
        self.reward=0
        super(view, self).__init__()
        self.n_actions = 361    #定义动作的可能个数
        self.n_features = 361
        self.doneList=[]
        self.allphoto=[]
        self.initView()
        self.env=env()
        self.wobservation=None
        self.wobservation_=None
        self.action1=None
        self.RL = DeepQNetwork(self.n_actions, self.n_features )

    def callback(self,event):
        if self.gameStart:
            mouse_x = event.x
            mouse_y = event.y
            if 590 > mouse_x > 20 and 590 > mouse_y > 20:
                # 横向为a,纵向为b
                a = round((mouse_x - 40) / 30)
                b = round((mouse_y - 40) / 30)
                action = b * 19 + a
                # self.env.qipan[b, a] = 2,非计算机方
                observation =self.getdouble(np.reshape(np.copy(self.env.qipan), [1, space]))
                bobservation=self.transfore(observation)
                qipan,observation_, reward, done=self.step(action, 'Black')
                bobservation_=self.transfore(observation_)
                print('人工下棋的reward:%d'%reward)
                self.RL.store_transition(bobservation, action, reward*1.5, bobservation_) #此处默认人的掷棋是最优的
                if done:
                    tkinter.messagebox.showinfo(title='提示', message='you win!!!1')
                    self.RL.learn(flag=2)
                    self.RL.saveavarriable()
                    self.RL.plot_cost()
                    self.gameStart=False
                # self.status = True
                #计算机选择动作
                self.bqipan=np.copy(self.env.qipan)
                wobservation = self.getdouble(np.reshape(self.bqipan,[1,space]))
                action1= self.RL.choose_action(self.bqipan,wobservation)     #这里让电脑选择下一步下
                bqipan_,wobservation_,reward,done=self.step(action1,'White')
                print('计算机下棋的reward:%d'%reward)
                self.RL.store_transition(observation, action, reward, observation_)
                if done:
                    tkinter.messagebox.showinfo(title='提示', message='you failure')
                    self.RL.saveavarriable()
                    self.RL.plot_cost()
                    self.gameStart = False

    def initView(self):
        def buttonCallBack():
            self.RL.getvarriable()
            self.gameStart = True
            if len(self.allphoto) > 0:

                for i in self.allphoto:
                    self.w.delete(i)

            self.allphoto.clear()
            self.doneList.clear()
            observation = self.env.reset()

        self.master = Tk()
        self.master.title("五子棋")
        self.master.resizable(width=False, height=False)
        self.w = Canvas(self.master, bg="#FFFFF0", width=700, height=630)
        for c in range(40, 610, 30):  # 竖向
            x0, y0, x1, y1 = c, 40, c, 580
            self.w.create_line(x0, y0, x1, y1)
        for r in range(40, 610, 30):
            x0, y0, x1, y1 = 40, r, 580, r
            self.w.create_line(x0, y0, x1, y1)
        Label(self.w, text=1, bg="#FFFFF0").place(x=5, y=5)
        x1 = 60
        y1 = 5
        for i in range(2, 20):
            Label(self.w, text=i, bg="#FFFFF0").place(x=x1, y=y1)
            x1 += 30
        x1 = 5
        y1 = 60
        for i in range(2, 20):
            Label(self.w, text=i, bg="#FFFFF0").place(x=x1, y=y1)
            y1 += 30
        Button(self.w, text="开始游戏", bg="yellow", activebackground="Black", command=buttonCallBack).place(x=610, y=500)
        self.w.bind("<Double-Button-1>", self.callback)
        self.w.pack()
        #self.master.mainloop()


    def show(self,action,flag):
        y=(action//19)*30+40
        x=(action%19)*30+40
        if flag=='Black':
            a=self.w.create_oval(x-14,y-14,x+14,y+14,fill="Black")
        elif flag=='White':
            a = self.w.create_oval(x-14, y-14, x+14, y+14, fill="White")
        self.allphoto.append(a)
        self.update()

    def setPosition(self,action,flag):
        if action in self.doneList:
            tkinter.messagebox.showinfo(title='提示', message='当前位置不可下')

        else:
            self.doneList.append(action)
            self.show(action,flag)

    def reset(self):
        if len(self.allphoto)>0:

            for i in self.allphoto:
                self.w.delete(i)
        self.allphoto.clear()
        self.doneList.clear()
        self.gameStart=False
        observation=self.env.reset()
        ob=self.getdouble(np.reshape(observation,[1,space]))
        return np.copy(self.env.qipan),ob


    #############################################
    def step(self,action,flag):
        # 根据不同的掷棋方,返回reward
        # print(flag)
        # print('ation:%d'%action)
        p1 = self.env.pwb(flag)
        p2 = self.env.pwn(action, flag)  # 走完后赢的可能性

        # print('落子前所得分数%d'%p1)
        # print('落子后所得分数%d'%p2)
        s=p2-p1
        # if s<=0:
        #     self.reward=0
        # elif 0<s<150:
        #     self.reward=300
        # elif 150<=s<800:
        #     self.reward=500
        # elif 800<=s<3500:
        #     self.reward=2000
        # elif 3500<=s<4800:
        #     self.reward=4000
        # elif s>4800:
        #     self.reward=6000

        print("该步的回报值:%d"%s)

        self.setPosition(action,flag)
        if(s==-120):
            time.sleep(10000)
        qipan=self.getdouble(np.reshape(np.copy(self.env.qipan),[1,space]))
        return np.copy(self.env.qipan),qipan,s,self.env.done


    def tryPosition(self,Ob,ation,flag):
         qipan=np.copy(Ob)
         if flag=='White':
             qipan[0,ation]=1
         else:
             qipan[0,ation]=2
         return qipan


    def render(self):
        self.update()

    def transfore(self,observation):
        # print(np.shape(shape)[1])
        s1=observation[0,:space]
        s2=observation[0,space:]
        s=np.hstack((s1,s2))
        return s

    #将棋盘1*361转化为1*722形式
    def getdouble(self,qipan):
        w_qipan=np.zeros([1,space])
        b_qipan=np.zeros([1,space])
        w_array=np.where(qipan==1)[1]
        b_array=np.where(qipan==2)[1]
        w_qipan[0,w_array]=1
        b_qipan[0,b_array]=1
        s=np.hstack((w_qipan,b_qipan))  #转化为1*722矩阵,前361是白字的状态,后361是黑子的状态
        return s