Esempio n. 1
0
class DeepQLearning(object):
    
    def __init__(self,expsize=10000):
        self.targetNN=NeuralNetwork()
        self.mainNN=NeuralNetwork()
        self.exp=ExperienceReplay(expsize)
        self.batch_size=512
        self.Epsilon=0.9
        self.Lambda=0.9
        self.map,self.map_=self.makemap(10)

    def start(self):
        self.mainNN.DuelingNetwork()
        self.targetNN.DuelingNetwork()

    def load(self,name):
        self.targetNN.Loadmodel(name)
        self.mainNN.Loadmodel(name)

    def saveModel(self,name):
        self.targetNN.Savemodel(name)


    def getNext(self,situation,numlist,israndom):
        ran=(random.uniform(0,1)>self.Epsilon)
        if ran or israndom:
            rannum=random.sample(numlist,1)
            print(rannum[0][0],rannum[0][1],rannum[0][2],rannum[0][3])
            max_index=self.map_[rannum[0][0],rannum[0][1],rannum[0][2],rannum[0][3]]
        else:
            index=self.mainNN.model.predict(situation)
            max_index=index.argmax()
        return self.getMove(max_index)


    def getMove(self,index):
        num=int(index)
        if num<2940:
            z=self.map[num]
            return num,z[0],z[1],z[2],z[3]            
        elif num>=2940:
            z=-1
            numm=num-2940
            x=int(numm/10)
            y=numm%10
            return num,x,y,x,y
            

    def saveEXP(self,fistS,action,reward,done,nextS):
        self.exp.add(fistS,action,reward,done,nextS)
        
    def learn(self):
        if self.exp.tree.flag:
            return
        fistS,actrew,nextS,idxl,isweight=self.exp.getDate(self.batch_size)
        qtarget=[]
        for i in nextS:
            targetv=self.targetNN.model.predict(i)
            mainv=self.mainNN.model.predict(i)
            for tv,mv in zip(targetv,mainv):
                v=tv[argmax(mv)]
                qtarget.append(v)
        k=0
        ans=[]
        errors=empty(self.batch_size)
        for i,j,done in actrew:
            r=zeros(3040)
            r[i]+=j+(1-done)*self.Lambda*qtarget[k]
            ans.append(r)
            errors[k]=abs(mainv[0][i]-r[i])
            k+=1
        self.mainNN.Training(fistS,ans,isweight,self.batch_size)
        self.exp.batch_updata(idxl,errors)

    def copy(self):
        self.targetNN.copy(self.mainNN.model.get_weights())

    def getmWeight(self):
        return self.mainNN.model.get_weights()

    def copym(self,mn):
        self.mainNN.copy(mn)

    def makemap(self,n):
        ans=[]
        for i in range(n):
            for j in range(n):
                for k in range(1,n):
                    if i<n and i>=0 and j+k<n and j+k>=0:
                        ans.append([i,j,i,j+k])
                for k in range(1,n):
                    if i+k<n and i+k>=0 and j+k<n and j+k>=0:
                        ans.append([i,j,i+k,j+k])
                for k in range(1,n):
                    if i+k<n and i+k>=0 and j<n and j>=0:
                        ans.append([i,j,i+k,j])
                for k in range(1,n):
                    if i+k<n and i+k>=0 and j-k<n and j-k>=0:
                        ans.append([i,j,i+k,j-k])
                for k in range(1,n):
                    if i<n and i>=0 and j-k<n and j-k>=0:
                        ans.append([i,j,i,j-k])
                for k in range(1,n):
                    if i-k<n and i-k>=0 and j-k<n and j-k>=0:
                        ans.append([i,j,i-k,j-k])
                for k in range(1,n):
                    if i-k<n and i-k>=0 and j<n and j>=0:
                        ans.append([i,j,i-k,j])
                for k in range(1,n):
                    if i-k<n and i-k>=0 and j+k<n and j+k>=0:
                        ans.append([i,j,i-k,j+k])
        ans_=dict()
        k=0
        for i in ans:
            ans_[i[0],i[1],i[2],i[3]]=k
            k+=1
        for i in range(n):
            for j in range(n):
                ans_[i,j,i,j]=k
                k+=1
        return ans,ans_