def CartPoleExperiment(Episodes=100,nk=0): print print '===================================================================' print ' INIT EXPERIMENT','k='+str(nk+1) # results of the experiment x = range(1,Episodes+1) y =[] yr =[] #Build the Environment Env = CartPoleEnvironment() # Build a function approximator #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[2,3,10,2],npoints=False,k=1,alpha=0.25) Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[2+7,3+7,10+3,2+7],npoints=False,k=4,alpha=0.3,lm=0.95) #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=False,npoints=100,k=4,alpha=0.3,lm=0.95) #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges) #Q = RNeuroQ(Env.nactions, Env.input_ranges, 200, Env.reward_ranges,alpha=0.3) #Q = NeuroQ(Env.nactions, Env.input_ranges, 100, Env.reward_ranges,Env.deep_in,Env.deep_out,alpha=0.3) # Get the Action Selector As = e_greedy_selection(epsilon=0.0) #As = e_softmax_selection(epsilon=0.3) #Build the Agent CP = FARLBase(Q,Env,As,gamma=1.0) CP.Environment.graphs=True for i in range(Episodes): result = CP.SARSAEpisode(1000) #result = CP.QLearningEpisode(1000) CP.SelectAction.epsilon = CP.SelectAction.epsilon * 0.9 CP.PlotLearningCurve(i,result[1],CP.SelectAction.epsilon) print 'Episode;', str(i),'Total Reward:',str(result[0]),'Steps:',str(result[1]) y.append(result[1]) yr.append(result[0]) ## if i==50: ## miny =min(y) ## figure(i) ## plot(range(1,len(y)+1),y,'k') ## title(r'$ k = 4, \quad \lambda=0.95,\quad \epsilon=0 , \quad \alpha=0.3 $') ## grid('on') ## axis([1, i, 0, 1100]) ## xlabel('Episodes') ## ylabel('Steps') ## savefig('cpresultdiscrete.pdf') ## print "salvado" ## close(i) CP.LearningCurveGraph.display.visible = False return [[x,y,nk],[x,yr,nk]]
def Experiment(Episodes=100, nk=1): print() print( '===================================================================') print(' INIT EXPERIMENT', 'k=' + str(nk + 1)) strfilename = "PDATA.npy" # results of the experiment x = list(range(1, Episodes + 1)) y = [] #Build the Environment Env = ProleEnvironment() # Build a function approximator #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[8,8,8,8,8,8],npoints=False,k=1,alpha=0.3,lm=0.95) Q = kNNQ(nactions=Env.nactions, input_ranges=Env.input_ranges, nelemns=[8, 8, 8, 8, 8, 8], npoints=False, k=2**6, alpha=10.5, lm=0.95) Q.Load(strfilename) #Experimental #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges) # Get the Action Selector As = e_greedy_selection(epsilon=0.1) #As = e_softmax_selection(epsilon=0.1) #Build the Agent RL = FARLBase(Q, Env, As, gamma=1.0) RL.Environment.graphs = True for i in range(Episodes): t1 = time.clock() result = RL.SARSAEpisode(1000) #result = RL.QLearningEpisode(1000) t2 = time.clock() - t1 RL.SelectAction.epsilon *= 0.9 #RL.Q.alpha *= 0.995 #RL.PlotLearningCurve(i,result[1],RL.SelectAction.epsilon) print('Episode', i, ' Steps:', result[1], 'Reward:', result[0], 'time', t2, 'alpha', RL.Q.alpha) #Q.Save(strfilename) y.append(result[1]) return [x, y, nk]
def AcrobotExperiment(Episodes=100, nk=1): print() print( '===================================================================') print(' INIT EXPERIMENT', 'k=' + str(nk)) # results of the experiment x = list(range(1, Episodes + 1)) y = [] # Build the Environment ACEnv = AcrobotEnvironment() # Build a function approximator Q = kNNQ(nactions=ACEnv.nactions, input_ranges=ACEnv.input_ranges, nelemns=[11, 11, 11, 11], npoints=False, k=2**4, alpha=5.0, lm=0.90) # Q.Q+=10000 # Q = kNNQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges,nelemns=False,npoints=300,k=5,alpha=0.3) # Q = NeuroQ(ACEnv.nactions, ACEnv.input_ranges, 30+nk, ACEnv.reward_ranges,ACEnv.deep_in,ACEnv.deep_out,alpha=0.3) # Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3) # Q = SOMQ(nactions=MCEnv.nactions,size_x=20,size_y=20,input_ranges=MCEnv.input_ranges,alpha=0.3) # Q = lwprQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges) # Get the Action Selector As = e_greedy_selection(epsilon=0.000) # As = e_softmax_selection(epsilon=0.1) # Build the Agent AC = FARLBase(Q, ACEnv, As, gamma=1.0) AC.Environment.graphs = True # False # AC.Environment.PlotPopulation(MC.Q) for i in range(Episodes): t1 = time.clock() result = AC.SARSAEpisode(1000) # result = AC.QLearningEpisode(1000) t2 = time.clock() - t1 # AC.SelectAction.epsilon = AC.SelectAction.epsilon * 0.9 AC.PlotLearningCurve(i, result[1], AC.SelectAction.epsilon) # AC.Environment.PlotPopulation(MC.Q) print('Episode', str(i), ' Steps:', str(result[1]), 'time', t2) y.append(result[1]) return [x, y, nk]
def Experiment(Episodes=100,nk=1): print print '===================================================================' print ' INIT EXPERIMENT','k='+str(nk+1) strfilename = "PDATA.npy" # results of the experiment x = range(1,Episodes+1) y =[] #Build the Environment Env = ProleEnvironment() # Build a function approximator #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[8,8,8,8,8,8],npoints=False,k=1,alpha=0.3,lm=0.95) Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[8,8,8,8,8,8],npoints=False,k=2**6,alpha=10.5,lm=0.95) Q.Load(strfilename) #Experimental #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges) # Get the Action Selector As = e_greedy_selection(epsilon=0.1) #As = e_softmax_selection(epsilon=0.1) #Build the Agent RL = FARLBase(Q,Env,As,gamma=1.0) RL.Environment.graphs=True for i in range(Episodes): t1= time.clock() result = RL.SARSAEpisode(1000) #result = RL.QLearningEpisode(1000) t2 = time.clock()-t1 RL.SelectAction.epsilon *= 0.9 #RL.Q.alpha *= 0.995 #RL.PlotLearningCurve(i,result[1],RL.SelectAction.epsilon) print 'Episode',i,' Steps:',result[1],'Reward:',result[0],'time',t2,'alpha',RL.Q.alpha #Q.Save(strfilename) y.append(result[1]) return [x,y,nk]
def AcrobotExperiment(Episodes=100,nk=1): print print '===================================================================' print ' INIT EXPERIMENT','k='+str(nk) # results of the experiment x = range(1,Episodes+1) y =[] #Build the Environment ACEnv = AcrobotEnvironment() # Build a function approximator Q = kNNQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges,nelemns=[11,11,11,11],npoints=False,k=2**4,alpha=5.0,lm=0.90) #Q.Q+=10000 #Q = kNNQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges,nelemns=False,npoints=300,k=5,alpha=0.3) #Q = NeuroQ(ACEnv.nactions, ACEnv.input_ranges, 30+nk, ACEnv.reward_ranges,ACEnv.deep_in,ACEnv.deep_out,alpha=0.3) #Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3) #Q = SOMQ(nactions=MCEnv.nactions,size_x=20,size_y=20,input_ranges=MCEnv.input_ranges,alpha=0.3) #Q = lwprQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges) # Get the Action Selector As = e_greedy_selection(epsilon=0.000) #As = e_softmax_selection(epsilon=0.1) #Build the Agent AC = FARLBase(Q,ACEnv,As,gamma=1.0) AC.Environment.graphs=True#False #AC.Environment.PlotPopulation(MC.Q) for i in range(Episodes): t1= time.clock() result = AC.SARSAEpisode(1000) #result = AC.QLearningEpisode(1000) t2 = time.clock()-t1 #AC.SelectAction.epsilon = AC.SelectAction.epsilon * 0.9 AC.PlotLearningCurve(i,result[1],AC.SelectAction.epsilon) #AC.Environment.PlotPopulation(MC.Q) print 'Episode',str(i),' Steps:',str(result[1]),'time',t2 y.append(result[1]) return [x,y,nk]
def MountainCarExperiment(Episodes=100, nk=1): print print '===================================================================' print ' INIT EXPERIMENT', 'k=' + str(nk + 1) # results of the experiment x = range(1, Episodes + 1) y = [] #Build the Environment MCEnv = MCEnvironment() # Build a function approximator #best Q = kNNQ(nactions=MCEnv.nactions, input_ranges=MCEnv.input_ranges, nelemns=[10 + 10, 5 + 10], npoints=False, k=nk + 1, alpha=0.9, lm=0.95) #Q = kNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=False,npoints=100,k=3**2,alpha=3,lm=0.0) #Q = SN(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelems=[20,15],k=[2,2],alpha=0.3,lm=0.0) #Experimental but acceptable performance #Q = lwprQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges) #Q = NeuroQ(MCEnv.nactions, MCEnv.input_ranges, 60, MCEnv.reward_ranges,MCEnv.deep_in,MCEnv.deep_out,alpha=0.3) # Experimental and not functional at this momment. #Q = DkNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=[10+10,5+10],npoints=False,k=nk+1,alpha=0.5) #Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3) #Q = kRNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3) #Q = SOMQ(nactions=MCEnv.nactions,size_x=10,size_y=10,input_ranges=MCEnv.input_ranges,alpha=0.3) # Get the Action Selector As = e_greedy_selection(epsilon=0.001) #As = e_softmax_selection(epsilon=0.1) #Build the Agent MC = FARLBase(Q, MCEnv, As, gamma=1.0) MC.Environment.graphs = True MC.Environment.PlotPopulation(MC.Q) for i in range(Episodes): t1 = time.clock() result = MC.SARSAEpisode(1000) #MC.Q.PushtTaces() MC.Q.ResetTraces() #result = MC.QLearningEpisode(1000) t2 = time.clock() - t1 MC.SelectAction.epsilon *= 0.9 #MC.Q.alpha *= 0.995 MC.PlotLearningCurve(i, result[1], MC.SelectAction.epsilon) MC.Environment.PlotPopulation(MC.Q) print 'Episode', i, ' Steps:', result[ 1], 'time', t2, 'alpha', MC.Q.alpha y.append(result[1]) ## if i==50: ## ## figure(i) ## plot(range(1,len(y)+1),y,'k') ## title(r'$ \min=105,\quad k = 4, \quad \lambda=0.95, \quad \epsilon=0.0, \quad \alpha=0.9 $') ## grid('on') ## axis([1, i, 0, 800]) ## xlabel('Episodes') ## ylabel('Steps') ## savefig('mcresult.pdf') ## print "salvado" ## close(i) return [x, y, nk]
def MountainCarExperiment(Episodes=100,nk=1): print print '===================================================================' print ' INIT EXPERIMENT','k='+str(nk+1) # results of the experiment x = range(1,Episodes+1) y =[] #Build the Environment MCEnv = MCEnvironment() # Build a function approximator #best Q = kNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=[10+10,5+10],npoints=False,k=nk+1,alpha=0.9,lm=0.95) #Q = kNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=False,npoints=100,k=3**2,alpha=3,lm=0.0) #Q = SN(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelems=[20,15],k=[2,2],alpha=0.3,lm=0.0) #Experimental but acceptable performance #Q = lwprQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges) #Q = NeuroQ(MCEnv.nactions, MCEnv.input_ranges, 60, MCEnv.reward_ranges,MCEnv.deep_in,MCEnv.deep_out,alpha=0.3) # Experimental and not functional at this momment. #Q = DkNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=[10+10,5+10],npoints=False,k=nk+1,alpha=0.5) #Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3) #Q = kRNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3) #Q = SOMQ(nactions=MCEnv.nactions,size_x=10,size_y=10,input_ranges=MCEnv.input_ranges,alpha=0.3) # Get the Action Selector As = e_greedy_selection(epsilon=0.001) #As = e_softmax_selection(epsilon=0.1) #Build the Agent MC = FARLBase(Q,MCEnv,As,gamma=1.0) MC.Environment.graphs=True MC.Environment.PlotPopulation(MC.Q) for i in range(Episodes): t1= time.clock() result = MC.SARSAEpisode(1000) #MC.Q.PushtTaces() MC.Q.ResetTraces() #result = MC.QLearningEpisode(1000) t2 = time.clock()-t1 MC.SelectAction.epsilon *= 0.9 #MC.Q.alpha *= 0.995 MC.PlotLearningCurve(i,result[1],MC.SelectAction.epsilon) MC.Environment.PlotPopulation(MC.Q) print 'Episode',i,' Steps:',result[1],'time',t2,'alpha',MC.Q.alpha y.append(result[1]) ## if i==50: ## ## figure(i) ## plot(range(1,len(y)+1),y,'k') ## title(r'$ \min=105,\quad k = 4, \quad \lambda=0.95, \quad \epsilon=0.0, \quad \alpha=0.9 $') ## grid('on') ## axis([1, i, 0, 800]) ## xlabel('Episodes') ## ylabel('Steps') ## savefig('mcresult.pdf') ## print "salvado" ## close(i) return [x,y,nk]