Exemple #1
0
 def s_hash_print(self, none_str='*'):
     
     rows_outL = []
     for row in self.s_hash_rowL:
         outL = []
         for s_hash in row:
             if not self.environment.is_legal_state( s_hash ):
                 if is_literal_str( s_hash ):
                     outL.append( s_hash[1:-1] )
                 else:
                     outL.append( none_str )
             else:
                 if s_hash in self.named_s_hashD:
                     outL.append( self.named_s_hashD[s_hash]  )
                 else:
                     outL.append( str(s_hash)  )
         rows_outL.append( outL )
     
     if rows_outL:
         lmaxL = print_string_rows( rows_outL, 
                                    row_tickL=self.row_tickL, const_col_w=True,
                                    col_tickL=self.col_tickL,
                                    header=self.environment.name, 
                                    y_axis_label=self.y_axis_label,
                                    x_axis_label='State-Hash')
         return lmaxL # return the number of characters in each row.
     else:
         return []
Exemple #2
0
 def param_print(self, paramD, 
                 row_tickL=None, const_col_w=True,
                 col_tickL=None, 
                 header='', 
                 x_axis_label='', y_axis_label='',
                 none_str='*'):
     """
     parameter values are in dictionary paramD
     paramD index=s_hash, value=string
     """
     rows_outL = []
     for row in self.s_hash_rowL:
         outL = []
         for s_hash in row:
             if (s_hash in paramD) and self.environment.is_legal_state( s_hash ):
                 outL.append( str( paramD[s_hash] )  )
             else:
                 if is_literal_str( s_hash ):
                     outL.append( s_hash[1:-1] )
                 elif s_hash in self.named_s_hashD:
                     outL.append( self.named_s_hashD[s_hash] )
                 else:
                     outL.append( none_str )
                 
         rows_outL.append( outL )
     
     if row_tickL is None:
         row_tickL = self.row_tickL
     
     if col_tickL is None:
         col_tickL = self.col_tickL
         
     if not x_axis_label:
         x_axis_label = self.x_axis_label
         
     if not y_axis_label:
         y_axis_label = self.y_axis_label
     
     if rows_outL:
         lmaxL = print_string_rows( rows_outL, row_tickL=row_tickL, 
                                    const_col_w=const_col_w,
                                    col_tickL=col_tickL,
                                    header=header, 
                                    x_axis_label=x_axis_label,
                                    y_axis_label=y_axis_label)
         return lmaxL # return the number of characters in each row.
     else:
         return []
Exemple #3
0
    def summ_print(self,
                   fmt_V='%g',
                   none_str='*',
                   show_states=True,
                   show_last_change=True,
                   show_policy=True):
        print()
        print('___ "%s" Alpha-Based State-Value Summary ___' %
              self.environment.name)

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print(none_str='*')

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            rows_outL = []
            last_delta_rows_outL = []  # if show_last_change == True
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                ld_outL = []
                ld_outL.append(none_str)
                for s_hash in row:
                    if not self.environment.is_legal_state(s_hash):
                        if is_literal_str(s_hash):
                            outL.append(s_hash[1:-1])
                            ld_outL.append(s_hash[1:-1])
                        else:
                            outL.append(none_str)
                            ld_outL.append(none_str)
                    else:
                        outL.append(fmt_V % self.VsD[s_hash])
                        delta = self.last_delta_VsD.get(s_hash, None)
                        if delta is None:
                            ld_outL.append('None')
                        else:
                            ld_outL.append(fmt_V % delta)

                rows_outL.append(outL)
                last_delta_rows_outL.append(ld_outL)

            print_string_rows(rows_outL,
                              row_tickL=row_tickL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              col_tickL=col_tickL,
                              header=self.environment.name +
                              ' State-Value Summary, V(s)',
                              x_axis_label=x_axis_label,
                              y_axis_label=y_axis_label,
                              justify='right')
            if show_last_change:
                print_string_rows(last_delta_rows_outL,
                                  row_tickL=row_tickL,
                                  const_col_w=True,
                                  line_chr='_',
                                  left_pad='    ',
                                  col_tickL=col_tickL,
                                  header=self.environment.name +
                                  ' Last Change to V(s) Summary',
                                  x_axis_label=x_axis_label,
                                  y_axis_label=y_axis_label,
                                  justify='right')

            if show_policy:
                policy = self.get_policy()
                policy.summ_print(verbosity=0, environment=self.environment)

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6
            lmax_V = 6

            outL = []  # list of tuples = (s_hash, V)
            for s_hash, V in self.VsD.items():
                outL.append((s_hash, V))

                lmax_hash = max(lmax_hash, len(str(s_hash)))
                lmax_V = max(lmax_V, len(fmt_V % V))

            fmt_hash = '%' + '%is' % lmax_hash
            fmt_strV = '%' + '%is' % lmax_V

            outL.sort()  # sort in-place
            for (s_hash, V) in outL:
                V = fmt_V % V
                print('    ', fmt_hash % str(s_hash), fmt_strV % V, end='')
                if show_last_change:
                    print(' Last Delta = %s' %
                          self.last_delta_VsD.get(s_hash, None))
                else:
                    print()
Exemple #4
0
 def summ_print(self, verbosity=2, environment=None, 
                show_env_states=True, none_str='*'): # pragma: no cover
     """Show State objects in sorted state_hash order."""
     print('___ Policy Summary ___' )
     print('    Nstate-actions=%i'%len(self.state_actionsD) )
     
     #self.state_coll.summ_print()
     #self.action_coll.summ_print()
     sL = sorted( [(S.hash,S) for S in self.state_actionsD.keys()], key=NaturalOrStrKey )
     if verbosity==2:
         for s_hash,S in sL:
             SA = self.state_actionsD[ S ]
             SA.summ_print()
             exL = [str(self.get_single_action(S.hash)) for i in range(16) ]
             print('        ex. actions:', ' '.join(exL))
     elif verbosity==1:
         print('        State Action')
         for s_hash,S in sL:
             SA = self.state_actionsD[ S ]
             
             # force a single action
             a_desc = self.get_single_action(S.hash)
             
             print('%13s'%str(s_hash),' %s'%a_desc, end=' ')
             if len(SA)>1:
                 optL = sorted( [ A.desc for (A,prob) in SA.action_probD.items()], key=NaturalOrStrKey )
                 print('from:',', '.join(optL))
             else:
                 print()
         
     
     if (environment is not None) and  (environment.layout is not None):
         # make summ_print using environment.layout
         if show_env_states:
             environment.layout.s_hash_print( none_str='*' )
         
         
         rows_outL = []
         for row in environment.layout.s_hash_rowL:
             outL = []
             for s_hash in row:
                 if not environment.is_legal_state( s_hash ):
                     if is_literal_str( s_hash ):
                         outL.append( s_hash[1:-1] )
                     else:
                         outL.append( none_str )
                 else:
                     a_desc = self.get_single_action(s_hash)
                     if a_desc is None:
                         outL.append( '  *' )
                     else:
                         outL.append( self.get_state_summ_str( s_hash, verbosity=verbosity ) )
                         
             rows_outL.append( outL )
         
         
         row_tickL = environment.layout.row_tickL
         col_tickL = environment.layout.col_tickL
         y_axis_label = environment.layout.y_axis_label
         
         if not environment.layout.x_axis_label:
             x_axis_label = 'Actions'
         else:
             x_axis_label = environment.layout.x_axis_label
         
         print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True, 
                            line_chr='_', left_pad='    ', 
                            header=environment.name + ' Policy Summary', 
                            x_axis_label=x_axis_label, justify='right',
                            col_tickL=col_tickL, y_axis_label=y_axis_label)
Exemple #5
0
    def save_diagram(self, environment, inp_colorD=None, pad=0.1, save_name='', 
                     show_arrows=True, do_show=False, scale=1.0, h_over_w=1.0,
                     show_terminal_labels=True):
        """
        Use matplotlib to create a color-coded policy diagram.
        Requires an environment.layout to do it.
        
        if inp_colorD is provided, it has, index=action, value=color string.
        """
        if (environment is None) or (environment.layout is None):
            print('WARNING... Need an environment with a layout to create a policy diagram.')
            return
        
        if not got_matplotlob:
            print('WARNING... Need matplotlib to create a policy diagram... it failed to import.')
            return
        
        #colorL = ['r','g','b','m','c','y']
        colorL = ['r','g','b','m','c','y',
                  'darkcyan','deepskyblue','darkorange','brown','deeppink',
                  'maroon','crimson','seagreen','fuchsia','darkviolet' ]

        colorD = {} # index=action, value=color string
        if inp_colorD is not None:
            colorD.update( inp_colorD )
        
        Ncols = len( environment.layout.s_hash_rowL[0] )
        Nrows = len( environment.layout.s_hash_rowL )
        
        w_lr = 1.0
        h_tb = 1.0
        fig = plt.figure( figsize=( scale*(Ncols+w_lr), h_over_w*scale*(Nrows+h_tb)) )
        
        axs = fig.add_axes()
        plt.axes()
        
        alignment = {'horizontalalignment': 'center', 'verticalalignment': 'center'}
        font = FontProperties()
        font.set_size('large')
        font.set_family('fantasy')
        font.set_style('normal')
        
        d = 1.0 - pad
        d2 = d / 2.0
        
        arrowL = [] # list of (s_hash, a_desc, color) for all arrows
        
        def get_rect_color( s_hash, action_list ):
            if len(action_list)==0:
                return ''
                
            if len(action_list)==1:
                a_desc = action_list[0]
                s = str( a_desc )
                if s in colorD:
                    c = colorD[s]
                else:
                    c = colorL[ len(colorD) % len(colorL) ]
                    colorD[s] = c
                return c
            
            basic_color = 'skyblue'
            if environment.layout.colorD is not None:
                if environment.layout.basic_color:
                    basic_color = environment.layout.basic_color
                return environment.layout.colorD.get(s_hash, basic_color)
            return basic_color
                
        
        for irow,row in enumerate(environment.layout.s_hash_rowL):
            outL = []
            y = Nrows - irow - 1
            for jcol,s_hash in enumerate(row):
                
                if environment.is_legal_state( s_hash ):
                    #a_desc = self.get_single_action(s_hash)
                    actionL = [a for (a,p) in self.get_list_of_all_action_desc_prob( s_hash, incl_zero_prob=False)]
                    
                    c_rect = get_rect_color( s_hash, actionL )
                    
                    if not actionL:
                        # if no actions possible, simply put state label.
                        # (if a color is specified, put a colored rectangle as well)
                        try:
                            c_rect = environment.layout.colorD.get(s_hash, '')
                        except:
                            c_rect = ''
                        if c_rect:
                            rect = Rectangle((jcol,y), d,   d, fc=c_rect, alpha=0.5, edgecolor=c_rect)
                            plt.gca().add_patch( rect )
                            
                        if show_terminal_labels:
                            s = str( s_hash )
                            t = plt.text(jcol+d2, y+d2, s, fontproperties=font, **alignment)
                    else:
                        # get here if the policy has one or more actions in this state.
                        
                        #      Rectangle(  (x,y),    width,   height)
                        rect = Rectangle((jcol,y), d,   d, fc=c_rect, alpha=0.5, edgecolor=c_rect)
                        plt.gca().add_patch( rect )
                        
                        sL = []
                        for a_desc in actionL:
                            s = str( a_desc )
                            sL.append(s)
                            if s in colorD:
                                c = colorD[s]
                            else:
                                c = colorL[ len(colorD) % len(colorL) ]
                                colorD[s] = c
                            #print('a_desc=',a_desc,' color=',c)
                            
                            # build a list of arrows to be placed on top after all rectangles are made.
                            if show_arrows:
                                arrowL.append( (s_hash, a_desc, c) )
                        
                        if sL:
                            t = plt.text(jcol+d2, y+d2, ' '.join(sL), fontproperties=font, **alignment)
                             
                            
                else:
                    rect = Rectangle((jcol,y), d,   d, fc='gray', alpha=0.5, edgecolor='gray')
                    plt.gca().add_patch( rect )
                    
                    if is_literal_str( s_hash ):
                        t = plt.text(jcol+d2, y+d2, s_hash[1:-1], fontproperties=font, **alignment)
    
    
        plt.xlim(0, Ncols)
        plt.ylim(0, Nrows)
        # if any arrows being shown, do it.
        if arrowL:
            for (s_hash, a_desc, c) in arrowL:
                try:
                    # draw_arrow relies on environment.get_action_snext_reward to calc next state.
                    #  (if it is stochastic, that will result in a random arrow)
                    draw_arrow(plt, Nrows, environment, s_hash, a_desc, pad,
                               color=c, Rinner=0.25, frac_len=0.2)
                except:
                    print('draw_arrow FAILED for:',s_hash, a_desc)
                
        #plt.axis('off')
        plt.xticks([])
        plt.yticks([])
        plt.box(False)
        
        plt.title('Policy for ' + environment.name )
        
        try:
            plt.tight_layout()
        except:
            print('WARNING... plt.tight_layout() FAILED.')

        if save_name:
            if save_name.lower().endswith('.png'):
                fig.savefig( save_name )
            else:
                fig.savefig( save_name + '.png' )
                
        if do_show:
            plt.show()
Exemple #6
0
    def summ_print(self, fmt_Q='%.3f', none_str='*', show_states=True, 
                   show_last_change=True, show_policy=True):
        print()
        print('___ "%s" Action-Value Summary ___'%self.environment.name  )

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print( none_str='*' )

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            d_max = self.get_max_last_delta_overall()
            if d_max==0.0:
                d_max = 1.0E-10

            rows_outL = []
            last_delta_rows_outL = [] # if show_last_change == True
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                ld_outL = []
                for s_hash in row:
                    if not self.environment.is_legal_state( s_hash ):
                        if is_literal_str( s_hash ):
                            outL.append( s_hash[1:-1] )
                            ld_outL.append( s_hash[1:-1] )
                        else:
                            outL.append( none_str )
                            ld_outL.append( none_str )
                    else: # s_hash is a legal state hash
                        aL = self.environment.get_state_legal_action_list( s_hash )
                        sL = [str(s_hash)]
                        ld_sL = [str(s_hash)]
                        for a_desc in aL:
                            qsa = self.QsaEst( s_hash, a_desc )
                            s = fmt_Q%qsa
                            sL.append( '%s='%str(a_desc) + s.strip()  )
                            try:
                                d_val = int(100.0*self.last_delta_QsaD[s_hash].get( a_desc )/d_max)
                                if d_val > 0:
                                    lds = '%i%%'%d_val
                                    ld_sL.append( '%s='%str(a_desc) + lds.strip()  )
                                else:
                                    ld_sL.append( '%s~0'%str(a_desc) )
                            except:
                                ld_sL.append( '%s=None'%str(a_desc) )
                                
                        outL.append(  '\n'.join(sL).strip()  )
                        ld_outL.append(  '\n'.join(ld_sL).strip()  )
                rows_outL.append( outL )
                last_delta_rows_outL.append( ld_outL )

            print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True,
                               line_chr='_', left_pad='    ', col_tickL=col_tickL,
                               header=self.environment.name + ' Action-Value Summary, Q(s,a)',
                               x_axis_label=x_axis_label, y_axis_label=y_axis_label,
                               justify='right')

            if show_last_change:
                print_string_rows( last_delta_rows_outL, row_tickL=row_tickL, const_col_w=True,
                                   line_chr='_', left_pad='    ', col_tickL=col_tickL,
                                   header=self.environment.name + ' Last %% of Max Change to Q(s,a) Summary, (max change=%g)'%d_max,
                                   x_axis_label=x_axis_label, y_axis_label=y_axis_label,
                                   justify='right')

            if show_policy:
                policy = self.get_policy()
                policy.summ_print(verbosity=0, environment=self.environment)

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6

            outL = [] # list of strings "(s_hash,a_desc)=Q"
            for s_hash in self.environment.iter_all_states():
                aL = self.environment.get_state_legal_action_list( s_hash )
                for a_desc in aL:
                    qsa = self.QsaEst( s_hash, a_desc )
                
                    q = fmt_Q%qsa
                    s = '(%s, %s)='%(str(s_hash),str(a_desc)) + q.strip()
                    if show_last_change:
                        s = s + ' Last Delta = %s'%self.last_delta_QsaD[s_hash].get( a_desc, None)
                    
                    outL.append( s )
                    lmax_hash = max(lmax_hash, len(s))
            outL.sort() # sort in-place
            for s in outL:
                print('    ', s )
    def summ_print(self,
                   fmt_Q='%g',
                   none_str='*',
                   show_states=True,
                   showRunningAve=True):
        print()
        print('___ "%s" Action-Value Summary ___' % self.environment.name)

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print(none_str='*')

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            rows_outL = []
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                for s_hash in row:
                    if not self.environment.is_legal_state(s_hash):
                        #outL.append( none_str )
                        if is_literal_str(s_hash):
                            outL.append(s_hash[1:-1])
                        else:
                            outL.append(none_str)
                    else:
                        #aL = self.environment.get_state_legal_action_list( s_hash )
                        aD = self.Qsa_RaveD[s_hash]
                        sL = [str(s_hash)]
                        for a_desc, Q in aD.items():
                            s = fmt_Q % Q.get_ave()
                            sL.append('%s=' % str(a_desc) + s.strip())
                        outL.append('\n'.join(sL).strip())
                rows_outL.append(outL)

            print_string_rows(rows_outL,
                              row_tickL=row_tickL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              col_tickL=col_tickL,
                              header=self.environment.name +
                              ' Action-Value Summary, Q(s,a)',
                              x_axis_label=x_axis_label,
                              y_axis_label=y_axis_label,
                              justify='right')

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6

            outL = []  # list of strings "(s_hash,a_desc)=Q"
            for s_hash in self.Qsa_RaveD.keys():
                for a_desc, Q in self.Qsa_RaveD[s_hash].items():
                    q = fmt_Q % Q.get_ave()
                    s = '(%s, %s)=' % (str(s_hash), str(a_desc)) + q.strip()
                    outL.append(s)
                    lmax_hash = max(lmax_hash, len(s))
                outL.sort()  # sort in-place
            for s in outL:
                print('    ', s)
    def summ_print(self,
                   fmt_V='%g',
                   none_str='*',
                   show_states=True,
                   showRunningAve=True):
        print()
        print('___ "%s" State-Value Summary ___' % self.environment.name)

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print(none_str='*')

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            rows_outL = []
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                for s_hash in row:
                    if not self.environment.is_legal_state(s_hash):
                        #outL.append( none_str )
                        if is_literal_str(s_hash):
                            outL.append(s_hash[1:-1])
                        else:
                            outL.append(none_str)
                    else:
                        outL.append(fmt_V % self.Vs_RaveD[s_hash].get_ave())
                rows_outL.append(outL)

            print_string_rows(rows_outL,
                              row_tickL=row_tickL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              col_tickL=col_tickL,
                              header=self.environment.name +
                              ' State-Value Summary, V(s)',
                              x_axis_label=x_axis_label,
                              y_axis_label=y_axis_label,
                              justify='right')

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6

            outL = []  # list of tuples = (s_hash, V)
            for s_hash, V in self.Vs_RaveD.items():
                outL.append((s_hash, V))
                lmax_hash = max(lmax_hash, len(str(s_hash)))
            fmt_hash = '%' + '%is' % lmax_hash

            outL.sort()  # sort in-place
            for (s_hash, V) in outL:
                print('    ', fmt_hash % str(s_hash), fmt_V % V)

        if showRunningAve:
            for s_hash, RA in self.Vs_RaveD.items(
            ):  # index=state_hash, value=RunningAve object
                RA.summ_print()
Exemple #9
0
    def s_hash_diagram(self, save_name='', basic_color='skyblue', do_show=False,
                       none_str='*', inp_colorD=None, pad=0.05, scale=1.0, h_over_w=1.0 ):
        """
        Create a PNG file of layout
        Use matplotlib to create a color-coded diagram.
        
        if inp_colorD is provided, it has, index=action, value=color string.
        pad determines the amount of white space between state rectangles.
        """
        
        if not got_matplotlob:
            print('WARNING... Need matplotlib to create a diagram... it failed to import.')
            return
        
        local_colorD = {}
        if self.colorD is not None:
            local_colorD.update( self.colorD )
        if inp_colorD is not None:
            local_colorD.update( inp_colorD )

        if self.basic_color:
            basic_color = self.basic_color
        
        #colorL = ['r','g','b','m','c','y',
        #          'darkcyan','deepskyblue','darkorange','brown','deeppink',
        #          'maroon','crimson','seagreen','fuchsia','darkviolet' ]
        
        Ncols = len( self.s_hash_rowL[0] )
        Nrows = len( self.s_hash_rowL )

        #fig, axs = plt.subplots()
        #fig.set_size_inches(Ncols+1, Nrows+1)
        
        w_lr = 1.0
        h_tb = 1.0
        fig = plt.figure( figsize=( scale*(Ncols+w_lr), h_over_w*scale*(Nrows+h_tb)) )
        
        axs = fig.add_axes()
        
        plt.axes()
        
        alignment = {'horizontalalignment': 'center', 'verticalalignment': 'center'}
        font = FontProperties()
        font.set_size('large')
        font.set_family('fantasy')
        font.set_style('normal')
        
        d = 1.0 - pad
        d2 = d / 2.0
        
        for irow,row in enumerate(self.s_hash_rowL):
            outL = []
            x = Nrows - irow - 1
            for jcol,s_hash in enumerate(row):
                
                if s_hash in local_colorD:
                    c = local_colorD[s_hash]
                else:
                    c = basic_color
                
                if self.environment.is_legal_state( s_hash ):
                    if s_hash in self.named_s_hashD:
                        s = self.named_s_hashD[s_hash]
                    else:
                        s = str( s_hash )
                    t = plt.text(jcol+d2, x+d2, s, fontproperties=font,**alignment)
                        
                    #      Rectangle(  (x,y),    width,   height)
                    rect = Rectangle((jcol,x), d,   d, fc=c, alpha=0.6, edgecolor=c)
                    plt.gca().add_patch( rect )
                        
                else:
                    if is_literal_str( s_hash ):
                        s = s_hash[1:-1]
                        t = plt.text(jcol+d2, x+d2, s, fontproperties=font,**alignment)
                    else:
                        rect = Rectangle((jcol,x), d,   d, fc='lemonchiffon', alpha=0.5, edgecolor='gray')
                        plt.gca().add_patch( rect )
                    
                             
    
        plt.xlim(0, Ncols)
        plt.ylim(0, Nrows)
        #plt.axis('off')
        
        if self.col_tickL is None:
            plt.xticks([])
        else:
            plt.xticks( [i+0.5 for i in range(len(self.col_tickL))], 
                        [str(ct) for ct in self.col_tickL] )

        if self.row_tickL is None:
            plt.yticks([])
        else:
            plt.yticks( [i+0.5 for i in range(len(self.row_tickL))], 
                        reversed([str(rt) for rt in self.row_tickL]) )
        
        
        plt.box(False)
        
        plt.title( self.environment.name )

        if self.x_axis_label:
            plt.xlabel( self.x_axis_label )
            
        if self.y_axis_label:
            plt.ylabel( self.y_axis_label )
            
        plt.tight_layout()


        if save_name:
            if save_name.lower().endswith('.png'):
                fig.savefig( save_name )
            else:
                fig.savefig( save_name + '.png' )
                
        if do_show:
            plt.show()
Exemple #10
0
    def layout_print(self,
                     vname='reward',
                     fmt='',
                     show_env_states=True,
                     none_str='*'):
        """print the value "vname" formatted by the environment layout (if present). """

        if self.layout is None:
            print(
                '...ERROR... "%s" tried to layout_print w/o a defined layout' %
                self.name)
            return

        if show_env_states:
            self.layout.s_hash_print(none_str=none_str)

        msgD = {}  # initialize special message dictionary to empty

        if vname == 'reward':
            valD, msgD = self.get_estimated_rewards(
            )  # index=s_hash, value=float reward estimate.
        else:
            valD = {}  # empty if not recognized vname

        x_axis_label = self.layout.x_axis_label
        y_axis_label = self.layout.y_axis_label
        row_tickL = self.layout.row_tickL
        col_tickL = self.layout.col_tickL

        rows_outL = []
        for row in self.layout.s_hash_rowL:
            outL = []
            for s_hash in row:
                if s_hash not in self.SC.stateD:
                    if is_literal_str(s_hash):
                        outL.append(s_hash[1:-1])
                    else:
                        outL.append(none_str)
                else:
                    val = valD.get(s_hash, None)
                    if val is None:
                        outL.append(none_str)
                    else:
                        if fmt:
                            outL.append(fmt % val)
                        else:
                            outL.append(str(val))
                if msgD.get(s_hash, ''):
                    outL[-1] = outL[-1] + msgD.get(s_hash, '')

            rows_outL.append(outL)

        if rows_outL:
            print_string_rows(rows_outL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              y_axis_label=y_axis_label,
                              row_tickL=row_tickL,
                              col_tickL=col_tickL,
                              header=self.name + ' %s Summary' % vname.title(),
                              x_axis_label=x_axis_label,
                              justify='right')