def CartPoleExperiment(Episodes=100,nk=0):

    print
    print '==================================================================='
    print '           INIT EXPERIMENT','k='+str(nk+1)

    # results of the experiment
    x = range(1,Episodes+1)
    y =[]
    yr =[]

    #Build the Environment
    Env = CartPoleEnvironment()

    # Build a function approximator
    #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[2,3,10,2],npoints=False,k=1,alpha=0.25)
    Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[2+7,3+7,10+3,2+7],npoints=False,k=4,alpha=0.3,lm=0.95)
    #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=False,npoints=100,k=4,alpha=0.3,lm=0.95)
    #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges)
    #Q = RNeuroQ(Env.nactions, Env.input_ranges, 200, Env.reward_ranges,alpha=0.3)
    #Q =  NeuroQ(Env.nactions, Env.input_ranges, 100, Env.reward_ranges,Env.deep_in,Env.deep_out,alpha=0.3)
    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.0)
    #As = e_softmax_selection(epsilon=0.3)

    #Build the Agent
    CP = FARLBase(Q,Env,As,gamma=1.0)
    CP.Environment.graphs=True


    for i in range(Episodes):
        result = CP.SARSAEpisode(1000)
        #result = CP.QLearningEpisode(1000)
        CP.SelectAction.epsilon = CP.SelectAction.epsilon * 0.9
        CP.PlotLearningCurve(i,result[1],CP.SelectAction.epsilon)
        print 'Episode;', str(i),'Total Reward:',str(result[0]),'Steps:',str(result[1])
        y.append(result[1])
        yr.append(result[0])
##        if i==50:
##            miny =min(y)
##            figure(i)
##            plot(range(1,len(y)+1),y,'k')
##            title(r'$ k = 4, \quad  \lambda=0.95,\quad \epsilon=0 , \quad  \alpha=0.3 $')
##            grid('on')
##            axis([1, i, 0, 1100])
##            xlabel('Episodes')
##            ylabel('Steps')
##            savefig('cpresultdiscrete.pdf')
##            print "salvado"
##            close(i)



    CP.LearningCurveGraph.display.visible = False

    return [[x,y,nk],[x,yr,nk]]
예제 #2
0
def CartPoleExperiment(Episodes=100,nk=0):

    print
    print '==================================================================='
    print '           INIT EXPERIMENT','k='+str(nk+1)

    # results of the experiment
    x = range(1,Episodes+1)
    y =[]
    yr =[]

    #Build the Environment
    Env = CartPoleEnvironment()

    # Build a function approximator
    #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[2,3,10,2],npoints=False,k=1,alpha=0.25)
    Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[2+7,3+7,10+3,2+7],npoints=False,k=4,alpha=0.3,lm=0.95)
    #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=False,npoints=100,k=4,alpha=0.3,lm=0.95)
    #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges)
    #Q = RNeuroQ(Env.nactions, Env.input_ranges, 200, Env.reward_ranges,alpha=0.3)
    #Q =  NeuroQ(Env.nactions, Env.input_ranges, 100, Env.reward_ranges,Env.deep_in,Env.deep_out,alpha=0.3)
    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.0)
    #As = e_softmax_selection(epsilon=0.3)

    #Build the Agent
    CP = FARLBase(Q,Env,As,gamma=1.0)
    CP.Environment.graphs=True


    for i in range(Episodes):
        result = CP.SARSAEpisode(1000)
        #result = CP.QLearningEpisode(1000)
        CP.SelectAction.epsilon = CP.SelectAction.epsilon * 0.9
        CP.PlotLearningCurve(i,result[1],CP.SelectAction.epsilon)
        print 'Episode;', str(i),'Total Reward:',str(result[0]),'Steps:',str(result[1])
        y.append(result[1])
        yr.append(result[0])
##        if i==50:
##            miny =min(y)
##            figure(i)
##            plot(range(1,len(y)+1),y,'k')
##            title(r'$ k = 4, \quad  \lambda=0.95,\quad \epsilon=0 , \quad  \alpha=0.3 $')
##            grid('on')
##            axis([1, i, 0, 1100])
##            xlabel('Episodes')
##            ylabel('Steps')
##            savefig('cpresultdiscrete.pdf')
##            print "salvado"
##            close(i)



    CP.LearningCurveGraph.display.visible = False

    return [[x,y,nk],[x,yr,nk]]
예제 #3
0
def Experiment(Episodes=100, nk=1):
    print()
    print(
        '===================================================================')
    print('           INIT EXPERIMENT', 'k=' + str(nk + 1))

    strfilename = "PDATA.npy"

    # results of the experiment
    x = list(range(1, Episodes + 1))
    y = []

    #Build the Environment
    Env = ProleEnvironment()

    # Build a function approximator
    #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[8,8,8,8,8,8],npoints=False,k=1,alpha=0.3,lm=0.95)
    Q = kNNQ(nactions=Env.nactions,
             input_ranges=Env.input_ranges,
             nelemns=[8, 8, 8, 8, 8, 8],
             npoints=False,
             k=2**6,
             alpha=10.5,
             lm=0.95)
    Q.Load(strfilename)

    #Experimental
    #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges)

    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.1)
    #As = e_softmax_selection(epsilon=0.1)

    #Build the Agent
    RL = FARLBase(Q, Env, As, gamma=1.0)
    RL.Environment.graphs = True

    for i in range(Episodes):
        t1 = time.clock()
        result = RL.SARSAEpisode(1000)
        #result = RL.QLearningEpisode(1000)
        t2 = time.clock() - t1
        RL.SelectAction.epsilon *= 0.9
        #RL.Q.alpha *= 0.995

        #RL.PlotLearningCurve(i,result[1],RL.SelectAction.epsilon)
        print('Episode', i, ' Steps:', result[1], 'Reward:', result[0], 'time',
              t2, 'alpha', RL.Q.alpha)
        #Q.Save(strfilename)
        y.append(result[1])

    return [x, y, nk]
예제 #4
0
def AcrobotExperiment(Episodes=100, nk=1):
    print()
    print(
        '===================================================================')
    print('           INIT EXPERIMENT', 'k=' + str(nk))

    # results of the experiment
    x = list(range(1, Episodes + 1))
    y = []

    # Build the Environment
    ACEnv = AcrobotEnvironment()

    # Build a function approximator
    Q = kNNQ(nactions=ACEnv.nactions,
             input_ranges=ACEnv.input_ranges,
             nelemns=[11, 11, 11, 11],
             npoints=False,
             k=2**4,
             alpha=5.0,
             lm=0.90)

    # Q.Q+=10000
    # Q = kNNQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges,nelemns=False,npoints=300,k=5,alpha=0.3)
    # Q = NeuroQ(ACEnv.nactions, ACEnv.input_ranges, 30+nk, ACEnv.reward_ranges,ACEnv.deep_in,ACEnv.deep_out,alpha=0.3)
    # Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3)
    # Q = SOMQ(nactions=MCEnv.nactions,size_x=20,size_y=20,input_ranges=MCEnv.input_ranges,alpha=0.3)
    # Q = lwprQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges)
    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.000)
    # As = e_softmax_selection(epsilon=0.1)

    # Build the Agent
    AC = FARLBase(Q, ACEnv, As, gamma=1.0)

    AC.Environment.graphs = True  # False
    # AC.Environment.PlotPopulation(MC.Q)

    for i in range(Episodes):
        t1 = time.clock()
        result = AC.SARSAEpisode(1000)
        # result = AC.QLearningEpisode(1000)
        t2 = time.clock() - t1
        # AC.SelectAction.epsilon = AC.SelectAction.epsilon * 0.9
        AC.PlotLearningCurve(i, result[1], AC.SelectAction.epsilon)
        # AC.Environment.PlotPopulation(MC.Q)
        print('Episode', str(i), ' Steps:', str(result[1]), 'time', t2)
        y.append(result[1])

    return [x, y, nk]
예제 #5
0
def Experiment(Episodes=100,nk=1):
    print
    print '==================================================================='
    print '           INIT EXPERIMENT','k='+str(nk+1)

    strfilename = "PDATA.npy"

    # results of the experiment
    x = range(1,Episodes+1)
    y =[]

    #Build the Environment
    Env = ProleEnvironment()

    # Build a function approximator
    #Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[8,8,8,8,8,8],npoints=False,k=1,alpha=0.3,lm=0.95)
    Q = kNNQ(nactions=Env.nactions,input_ranges=Env.input_ranges,nelemns=[8,8,8,8,8,8],npoints=False,k=2**6,alpha=10.5,lm=0.95)
    Q.Load(strfilename)

    #Experimental
    #Q = lwprQ(nactions=Env.nactions,input_ranges=Env.input_ranges)


    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.1)
    #As = e_softmax_selection(epsilon=0.1)

    #Build the Agent
    RL = FARLBase(Q,Env,As,gamma=1.0)
    RL.Environment.graphs=True



    for i in range(Episodes):
        t1= time.clock()
        result = RL.SARSAEpisode(1000)
        #result = RL.QLearningEpisode(1000)
        t2 = time.clock()-t1
        RL.SelectAction.epsilon *= 0.9
        #RL.Q.alpha *= 0.995

        #RL.PlotLearningCurve(i,result[1],RL.SelectAction.epsilon)
        print 'Episode',i,' Steps:',result[1],'Reward:',result[0],'time',t2,'alpha',RL.Q.alpha
        #Q.Save(strfilename)
        y.append(result[1])


    return [x,y,nk]
def AcrobotExperiment(Episodes=100,nk=1):
    print
    print '==================================================================='
    print '           INIT EXPERIMENT','k='+str(nk)

    # results of the experiment
    x = range(1,Episodes+1)
    y =[]

    #Build the Environment
    ACEnv = AcrobotEnvironment()

    # Build a function approximator
    Q = kNNQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges,nelemns=[11,11,11,11],npoints=False,k=2**4,alpha=5.0,lm=0.90)

    #Q.Q+=10000
    #Q = kNNQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges,nelemns=False,npoints=300,k=5,alpha=0.3)
    #Q = NeuroQ(ACEnv.nactions, ACEnv.input_ranges, 30+nk, ACEnv.reward_ranges,ACEnv.deep_in,ACEnv.deep_out,alpha=0.3)
    #Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3)
    #Q = SOMQ(nactions=MCEnv.nactions,size_x=20,size_y=20,input_ranges=MCEnv.input_ranges,alpha=0.3)
    #Q = lwprQ(nactions=ACEnv.nactions,input_ranges=ACEnv.input_ranges)
    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.000)
    #As = e_softmax_selection(epsilon=0.1)

    #Build the Agent
    AC = FARLBase(Q,ACEnv,As,gamma=1.0)

    AC.Environment.graphs=True#False
    #AC.Environment.PlotPopulation(MC.Q)




    for i in range(Episodes):
        t1= time.clock()
        result = AC.SARSAEpisode(1000)
        #result = AC.QLearningEpisode(1000)
        t2 = time.clock()-t1
        #AC.SelectAction.epsilon = AC.SelectAction.epsilon * 0.9
        AC.PlotLearningCurve(i,result[1],AC.SelectAction.epsilon)
        #AC.Environment.PlotPopulation(MC.Q)
        print 'Episode',str(i),' Steps:',str(result[1]),'time',t2
        y.append(result[1])

    return [x,y,nk]
예제 #7
0
def MountainCarExperiment(Episodes=100, nk=1):
    print
    print '==================================================================='
    print '           INIT EXPERIMENT', 'k=' + str(nk + 1)

    # results of the experiment
    x = range(1, Episodes + 1)
    y = []

    #Build the Environment
    MCEnv = MCEnvironment()

    # Build a function approximator
    #best
    Q = kNNQ(nactions=MCEnv.nactions,
             input_ranges=MCEnv.input_ranges,
             nelemns=[10 + 10, 5 + 10],
             npoints=False,
             k=nk + 1,
             alpha=0.9,
             lm=0.95)
    #Q = kNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=False,npoints=100,k=3**2,alpha=3,lm=0.0)
    #Q = SN(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelems=[20,15],k=[2,2],alpha=0.3,lm=0.0)
    #Experimental but acceptable performance
    #Q = lwprQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges)
    #Q = NeuroQ(MCEnv.nactions, MCEnv.input_ranges, 60, MCEnv.reward_ranges,MCEnv.deep_in,MCEnv.deep_out,alpha=0.3)

    # Experimental and not functional at this momment.
    #Q = DkNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=[10+10,5+10],npoints=False,k=nk+1,alpha=0.5)
    #Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3)
    #Q = kRNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3)
    #Q = SOMQ(nactions=MCEnv.nactions,size_x=10,size_y=10,input_ranges=MCEnv.input_ranges,alpha=0.3)

    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.001)
    #As = e_softmax_selection(epsilon=0.1)

    #Build the Agent
    MC = FARLBase(Q, MCEnv, As, gamma=1.0)
    MC.Environment.graphs = True
    MC.Environment.PlotPopulation(MC.Q)

    for i in range(Episodes):
        t1 = time.clock()
        result = MC.SARSAEpisode(1000)
        #MC.Q.PushtTaces()
        MC.Q.ResetTraces()
        #result = MC.QLearningEpisode(1000)
        t2 = time.clock() - t1
        MC.SelectAction.epsilon *= 0.9
        #MC.Q.alpha *= 0.995

        MC.PlotLearningCurve(i, result[1], MC.SelectAction.epsilon)
        MC.Environment.PlotPopulation(MC.Q)
        print 'Episode', i, ' Steps:', result[
            1], 'time', t2, 'alpha', MC.Q.alpha

        y.append(result[1])


##        if i==50:
##
##            figure(i)
##            plot(range(1,len(y)+1),y,'k')
##            title(r'$ \min=105,\quad k = 4, \quad  \lambda=0.95, \quad  \epsilon=0.0, \quad \alpha=0.9 $')
##            grid('on')
##            axis([1, i, 0, 800])
##            xlabel('Episodes')
##            ylabel('Steps')
##            savefig('mcresult.pdf')
##            print "salvado"
##            close(i)

    return [x, y, nk]
def MountainCarExperiment(Episodes=100,nk=1):
    print
    print '==================================================================='
    print '           INIT EXPERIMENT','k='+str(nk+1)

    # results of the experiment
    x = range(1,Episodes+1)
    y =[]

    #Build the Environment
    MCEnv = MCEnvironment()

    # Build a function approximator
    #best
    Q = kNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=[10+10,5+10],npoints=False,k=nk+1,alpha=0.9,lm=0.95)
    #Q = kNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=False,npoints=100,k=3**2,alpha=3,lm=0.0)
    #Q = SN(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelems=[20,15],k=[2,2],alpha=0.3,lm=0.0)
    #Experimental but acceptable performance
    #Q = lwprQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges)
    #Q = NeuroQ(MCEnv.nactions, MCEnv.input_ranges, 60, MCEnv.reward_ranges,MCEnv.deep_in,MCEnv.deep_out,alpha=0.3)



    # Experimental and not functional at this momment.
    #Q = DkNNQ(nactions=MCEnv.nactions,input_ranges=MCEnv.input_ranges,nelemns=[10+10,5+10],npoints=False,k=nk+1,alpha=0.5)
    #Q = RNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3)
    #Q = kRNeuroQ(MCEnv.nactions, MCEnv.input_ranges, 10, MCEnv.reward_ranges,alpha=0.3)
    #Q = SOMQ(nactions=MCEnv.nactions,size_x=10,size_y=10,input_ranges=MCEnv.input_ranges,alpha=0.3)

    # Get the Action Selector
    As = e_greedy_selection(epsilon=0.001)
    #As = e_softmax_selection(epsilon=0.1)

    #Build the Agent
    MC = FARLBase(Q,MCEnv,As,gamma=1.0)
    MC.Environment.graphs=True
    MC.Environment.PlotPopulation(MC.Q)




    for i in range(Episodes):
        t1= time.clock()
        result = MC.SARSAEpisode(1000)
        #MC.Q.PushtTaces()
        MC.Q.ResetTraces()
        #result = MC.QLearningEpisode(1000)
        t2 = time.clock()-t1
        MC.SelectAction.epsilon *= 0.9
        #MC.Q.alpha *= 0.995

        MC.PlotLearningCurve(i,result[1],MC.SelectAction.epsilon)
        MC.Environment.PlotPopulation(MC.Q)
        print 'Episode',i,' Steps:',result[1],'time',t2,'alpha',MC.Q.alpha

        y.append(result[1])
##        if i==50:
##
##            figure(i)
##            plot(range(1,len(y)+1),y,'k')
##            title(r'$ \min=105,\quad k = 4, \quad  \lambda=0.95, \quad  \epsilon=0.0, \quad \alpha=0.9 $')
##            grid('on')
##            axis([1, i, 0, 800])
##            xlabel('Episodes')
##            ylabel('Steps')
##            savefig('mcresult.pdf')
##            print "salvado"
##            close(i)


    return [x,y,nk]