コード例 #1
0
                    
                    #sum_diffs += sv_mc.mc_update( s_hash, alpha, G)
                
                    #sum_diffs += sv_td.td0_update( s_hash=s_hash, alpha=alpha, 
                    #                                     gamma=gamma, sn_hash=sn_hash, 
                    #                                     reward=reward)
            for s_hash in mc_updateD.keys():
                mc_updateD[s_hash] *= alpha
                td_updateD[s_hash] *= alpha
                
            sum_diffs = sum( [abs(v) for v in mc_updateD.values()] + \
                             [abs(v) for v in td_updateD.values()] )
            
            # update state values
            for s_hash in mc_updateD.keys():
                sv_mc.delta_update( s_hash, mc_updateD[s_hash] )
                sv_td.delta_update( s_hash, td_updateD[s_hash] )
                
            if sum_diffs  < 1e-3:
                break
                
        if inner_loop >= LOOP_LIMIT:
            print('LOOP EXIT')
        
        # add this loops state values to running_ave
        mc_rms_raveL[i_loop].add_val( sv_mc.calc_rms_error( true_valueD ) )
        td_rms_raveL[i_loop].add_val( sv_td.calc_rms_error( true_valueD ) )

mc_rmsL = [R.get_ave() for R in mc_rms_raveL]
td_rmsL = [R.get_ave() for R in td_rms_raveL]
コード例 #2
0
                mc_avegD[s_hash] = RunningAve()
            if (s_hash,sn_hash) not in td_averD:
                td_averD[(s_hash,sn_hash)] = RunningAve()
            
            mc_avegD[s_hash].add_val( G )
            td_averD[(s_hash,sn_hash)].add_val( reward )
    
        # set the Monte Carlo V(s) values for this experiment
        for s_hash, G in mc_avegD.items():
            sv_mc.set_Vs( s_hash, G.get_ave() )
    
        # set the TD(0) values for this experiment
        for update_loop in range(20):
            errD, total_err = calc_td_error( show_values=False )
            for s_hash, err in errD.items():
                sv_td.delta_update( s_hash=s_hash, delta=err*alpha)
        
        # add this loops RMS of state values to RMS running_ave
        mc_rms_raveL[i_loop].add_val( sv_mc.calc_rms_error( true_valueD ) )
        td_rms_raveL[i_loop].add_val( sv_td.calc_rms_error( true_valueD ) )
    

mc_rmsL = [R.get_ave() for R in mc_rms_raveL]
td_rmsL = [R.get_ave() for R in td_rms_raveL]

fig, ax = plt.subplots()

ax.plot(mc_rmsL, 'r-', label='MC')
ax.plot(td_rmsL, 'c-', label='TD(0)')
td_erros = [0.23570226, 0.23500565, 0.14095847, 0.13294523, 0.12816648,
       0.12551351, 0.12472649, 0.12393498, 0.1234716 , 0.12199879,