def s_hash_print(self, none_str='*'): rows_outL = [] for row in self.s_hash_rowL: outL = [] for s_hash in row: if not self.environment.is_legal_state( s_hash ): if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) else: outL.append( none_str ) else: if s_hash in self.named_s_hashD: outL.append( self.named_s_hashD[s_hash] ) else: outL.append( str(s_hash) ) rows_outL.append( outL ) if rows_outL: lmaxL = print_string_rows( rows_outL, row_tickL=self.row_tickL, const_col_w=True, col_tickL=self.col_tickL, header=self.environment.name, y_axis_label=self.y_axis_label, x_axis_label='State-Hash') return lmaxL # return the number of characters in each row. else: return []
def param_print(self, paramD, row_tickL=None, const_col_w=True, col_tickL=None, header='', x_axis_label='', y_axis_label='', none_str='*'): """ parameter values are in dictionary paramD paramD index=s_hash, value=string """ rows_outL = [] for row in self.s_hash_rowL: outL = [] for s_hash in row: if (s_hash in paramD) and self.environment.is_legal_state( s_hash ): outL.append( str( paramD[s_hash] ) ) else: if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) elif s_hash in self.named_s_hashD: outL.append( self.named_s_hashD[s_hash] ) else: outL.append( none_str ) rows_outL.append( outL ) if row_tickL is None: row_tickL = self.row_tickL if col_tickL is None: col_tickL = self.col_tickL if not x_axis_label: x_axis_label = self.x_axis_label if not y_axis_label: y_axis_label = self.y_axis_label if rows_outL: lmaxL = print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=const_col_w, col_tickL=col_tickL, header=header, x_axis_label=x_axis_label, y_axis_label=y_axis_label) return lmaxL # return the number of characters in each row. else: return []
def summ_print(self, fmt_V='%g', none_str='*', show_states=True, show_last_change=True, show_policy=True): print() print('___ "%s" Alpha-Based State-Value Summary ___' % self.environment.name) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print(none_str='*') row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label rows_outL = [] last_delta_rows_outL = [] # if show_last_change == True for row in self.environment.layout.s_hash_rowL: outL = [] ld_outL = [] ld_outL.append(none_str) for s_hash in row: if not self.environment.is_legal_state(s_hash): if is_literal_str(s_hash): outL.append(s_hash[1:-1]) ld_outL.append(s_hash[1:-1]) else: outL.append(none_str) ld_outL.append(none_str) else: outL.append(fmt_V % self.VsD[s_hash]) delta = self.last_delta_VsD.get(s_hash, None) if delta is None: ld_outL.append('None') else: ld_outL.append(fmt_V % delta) rows_outL.append(outL) last_delta_rows_outL.append(ld_outL) print_string_rows(rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' State-Value Summary, V(s)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_last_change: print_string_rows(last_delta_rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Last Change to V(s) Summary', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_policy: policy = self.get_policy() policy.summ_print(verbosity=0, environment=self.environment) # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 lmax_V = 6 outL = [] # list of tuples = (s_hash, V) for s_hash, V in self.VsD.items(): outL.append((s_hash, V)) lmax_hash = max(lmax_hash, len(str(s_hash))) lmax_V = max(lmax_V, len(fmt_V % V)) fmt_hash = '%' + '%is' % lmax_hash fmt_strV = '%' + '%is' % lmax_V outL.sort() # sort in-place for (s_hash, V) in outL: V = fmt_V % V print(' ', fmt_hash % str(s_hash), fmt_strV % V, end='') if show_last_change: print(' Last Delta = %s' % self.last_delta_VsD.get(s_hash, None)) else: print()
def summ_print(self, verbosity=2, environment=None, show_env_states=True, none_str='*'): # pragma: no cover """Show State objects in sorted state_hash order.""" print('___ Policy Summary ___' ) print(' Nstate-actions=%i'%len(self.state_actionsD) ) #self.state_coll.summ_print() #self.action_coll.summ_print() sL = sorted( [(S.hash,S) for S in self.state_actionsD.keys()], key=NaturalOrStrKey ) if verbosity==2: for s_hash,S in sL: SA = self.state_actionsD[ S ] SA.summ_print() exL = [str(self.get_single_action(S.hash)) for i in range(16) ] print(' ex. actions:', ' '.join(exL)) elif verbosity==1: print(' State Action') for s_hash,S in sL: SA = self.state_actionsD[ S ] # force a single action a_desc = self.get_single_action(S.hash) print('%13s'%str(s_hash),' %s'%a_desc, end=' ') if len(SA)>1: optL = sorted( [ A.desc for (A,prob) in SA.action_probD.items()], key=NaturalOrStrKey ) print('from:',', '.join(optL)) else: print() if (environment is not None) and (environment.layout is not None): # make summ_print using environment.layout if show_env_states: environment.layout.s_hash_print( none_str='*' ) rows_outL = [] for row in environment.layout.s_hash_rowL: outL = [] for s_hash in row: if not environment.is_legal_state( s_hash ): if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) else: outL.append( none_str ) else: a_desc = self.get_single_action(s_hash) if a_desc is None: outL.append( ' *' ) else: outL.append( self.get_state_summ_str( s_hash, verbosity=verbosity ) ) rows_outL.append( outL ) row_tickL = environment.layout.row_tickL col_tickL = environment.layout.col_tickL y_axis_label = environment.layout.y_axis_label if not environment.layout.x_axis_label: x_axis_label = 'Actions' else: x_axis_label = environment.layout.x_axis_label print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', header=environment.name + ' Policy Summary', x_axis_label=x_axis_label, justify='right', col_tickL=col_tickL, y_axis_label=y_axis_label)
def save_diagram(self, environment, inp_colorD=None, pad=0.1, save_name='', show_arrows=True, do_show=False, scale=1.0, h_over_w=1.0, show_terminal_labels=True): """ Use matplotlib to create a color-coded policy diagram. Requires an environment.layout to do it. if inp_colorD is provided, it has, index=action, value=color string. """ if (environment is None) or (environment.layout is None): print('WARNING... Need an environment with a layout to create a policy diagram.') return if not got_matplotlob: print('WARNING... Need matplotlib to create a policy diagram... it failed to import.') return #colorL = ['r','g','b','m','c','y'] colorL = ['r','g','b','m','c','y', 'darkcyan','deepskyblue','darkorange','brown','deeppink', 'maroon','crimson','seagreen','fuchsia','darkviolet' ] colorD = {} # index=action, value=color string if inp_colorD is not None: colorD.update( inp_colorD ) Ncols = len( environment.layout.s_hash_rowL[0] ) Nrows = len( environment.layout.s_hash_rowL ) w_lr = 1.0 h_tb = 1.0 fig = plt.figure( figsize=( scale*(Ncols+w_lr), h_over_w*scale*(Nrows+h_tb)) ) axs = fig.add_axes() plt.axes() alignment = {'horizontalalignment': 'center', 'verticalalignment': 'center'} font = FontProperties() font.set_size('large') font.set_family('fantasy') font.set_style('normal') d = 1.0 - pad d2 = d / 2.0 arrowL = [] # list of (s_hash, a_desc, color) for all arrows def get_rect_color( s_hash, action_list ): if len(action_list)==0: return '' if len(action_list)==1: a_desc = action_list[0] s = str( a_desc ) if s in colorD: c = colorD[s] else: c = colorL[ len(colorD) % len(colorL) ] colorD[s] = c return c basic_color = 'skyblue' if environment.layout.colorD is not None: if environment.layout.basic_color: basic_color = environment.layout.basic_color return environment.layout.colorD.get(s_hash, basic_color) return basic_color for irow,row in enumerate(environment.layout.s_hash_rowL): outL = [] y = Nrows - irow - 1 for jcol,s_hash in enumerate(row): if environment.is_legal_state( s_hash ): #a_desc = self.get_single_action(s_hash) actionL = [a for (a,p) in self.get_list_of_all_action_desc_prob( s_hash, incl_zero_prob=False)] c_rect = get_rect_color( s_hash, actionL ) if not actionL: # if no actions possible, simply put state label. # (if a color is specified, put a colored rectangle as well) try: c_rect = environment.layout.colorD.get(s_hash, '') except: c_rect = '' if c_rect: rect = Rectangle((jcol,y), d, d, fc=c_rect, alpha=0.5, edgecolor=c_rect) plt.gca().add_patch( rect ) if show_terminal_labels: s = str( s_hash ) t = plt.text(jcol+d2, y+d2, s, fontproperties=font, **alignment) else: # get here if the policy has one or more actions in this state. # Rectangle( (x,y), width, height) rect = Rectangle((jcol,y), d, d, fc=c_rect, alpha=0.5, edgecolor=c_rect) plt.gca().add_patch( rect ) sL = [] for a_desc in actionL: s = str( a_desc ) sL.append(s) if s in colorD: c = colorD[s] else: c = colorL[ len(colorD) % len(colorL) ] colorD[s] = c #print('a_desc=',a_desc,' color=',c) # build a list of arrows to be placed on top after all rectangles are made. if show_arrows: arrowL.append( (s_hash, a_desc, c) ) if sL: t = plt.text(jcol+d2, y+d2, ' '.join(sL), fontproperties=font, **alignment) else: rect = Rectangle((jcol,y), d, d, fc='gray', alpha=0.5, edgecolor='gray') plt.gca().add_patch( rect ) if is_literal_str( s_hash ): t = plt.text(jcol+d2, y+d2, s_hash[1:-1], fontproperties=font, **alignment) plt.xlim(0, Ncols) plt.ylim(0, Nrows) # if any arrows being shown, do it. if arrowL: for (s_hash, a_desc, c) in arrowL: try: # draw_arrow relies on environment.get_action_snext_reward to calc next state. # (if it is stochastic, that will result in a random arrow) draw_arrow(plt, Nrows, environment, s_hash, a_desc, pad, color=c, Rinner=0.25, frac_len=0.2) except: print('draw_arrow FAILED for:',s_hash, a_desc) #plt.axis('off') plt.xticks([]) plt.yticks([]) plt.box(False) plt.title('Policy for ' + environment.name ) try: plt.tight_layout() except: print('WARNING... plt.tight_layout() FAILED.') if save_name: if save_name.lower().endswith('.png'): fig.savefig( save_name ) else: fig.savefig( save_name + '.png' ) if do_show: plt.show()
def summ_print(self, fmt_Q='%.3f', none_str='*', show_states=True, show_last_change=True, show_policy=True): print() print('___ "%s" Action-Value Summary ___'%self.environment.name ) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print( none_str='*' ) row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label d_max = self.get_max_last_delta_overall() if d_max==0.0: d_max = 1.0E-10 rows_outL = [] last_delta_rows_outL = [] # if show_last_change == True for row in self.environment.layout.s_hash_rowL: outL = [] ld_outL = [] for s_hash in row: if not self.environment.is_legal_state( s_hash ): if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) ld_outL.append( s_hash[1:-1] ) else: outL.append( none_str ) ld_outL.append( none_str ) else: # s_hash is a legal state hash aL = self.environment.get_state_legal_action_list( s_hash ) sL = [str(s_hash)] ld_sL = [str(s_hash)] for a_desc in aL: qsa = self.QsaEst( s_hash, a_desc ) s = fmt_Q%qsa sL.append( '%s='%str(a_desc) + s.strip() ) try: d_val = int(100.0*self.last_delta_QsaD[s_hash].get( a_desc )/d_max) if d_val > 0: lds = '%i%%'%d_val ld_sL.append( '%s='%str(a_desc) + lds.strip() ) else: ld_sL.append( '%s~0'%str(a_desc) ) except: ld_sL.append( '%s=None'%str(a_desc) ) outL.append( '\n'.join(sL).strip() ) ld_outL.append( '\n'.join(ld_sL).strip() ) rows_outL.append( outL ) last_delta_rows_outL.append( ld_outL ) print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Action-Value Summary, Q(s,a)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_last_change: print_string_rows( last_delta_rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Last %% of Max Change to Q(s,a) Summary, (max change=%g)'%d_max, x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_policy: policy = self.get_policy() policy.summ_print(verbosity=0, environment=self.environment) # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of strings "(s_hash,a_desc)=Q" for s_hash in self.environment.iter_all_states(): aL = self.environment.get_state_legal_action_list( s_hash ) for a_desc in aL: qsa = self.QsaEst( s_hash, a_desc ) q = fmt_Q%qsa s = '(%s, %s)='%(str(s_hash),str(a_desc)) + q.strip() if show_last_change: s = s + ' Last Delta = %s'%self.last_delta_QsaD[s_hash].get( a_desc, None) outL.append( s ) lmax_hash = max(lmax_hash, len(s)) outL.sort() # sort in-place for s in outL: print(' ', s )
def summ_print(self, fmt_Q='%g', none_str='*', show_states=True, showRunningAve=True): print() print('___ "%s" Action-Value Summary ___' % self.environment.name) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print(none_str='*') row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label rows_outL = [] for row in self.environment.layout.s_hash_rowL: outL = [] for s_hash in row: if not self.environment.is_legal_state(s_hash): #outL.append( none_str ) if is_literal_str(s_hash): outL.append(s_hash[1:-1]) else: outL.append(none_str) else: #aL = self.environment.get_state_legal_action_list( s_hash ) aD = self.Qsa_RaveD[s_hash] sL = [str(s_hash)] for a_desc, Q in aD.items(): s = fmt_Q % Q.get_ave() sL.append('%s=' % str(a_desc) + s.strip()) outL.append('\n'.join(sL).strip()) rows_outL.append(outL) print_string_rows(rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Action-Value Summary, Q(s,a)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of strings "(s_hash,a_desc)=Q" for s_hash in self.Qsa_RaveD.keys(): for a_desc, Q in self.Qsa_RaveD[s_hash].items(): q = fmt_Q % Q.get_ave() s = '(%s, %s)=' % (str(s_hash), str(a_desc)) + q.strip() outL.append(s) lmax_hash = max(lmax_hash, len(s)) outL.sort() # sort in-place for s in outL: print(' ', s)
def summ_print(self, fmt_V='%g', none_str='*', show_states=True, showRunningAve=True): print() print('___ "%s" State-Value Summary ___' % self.environment.name) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print(none_str='*') row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label rows_outL = [] for row in self.environment.layout.s_hash_rowL: outL = [] for s_hash in row: if not self.environment.is_legal_state(s_hash): #outL.append( none_str ) if is_literal_str(s_hash): outL.append(s_hash[1:-1]) else: outL.append(none_str) else: outL.append(fmt_V % self.Vs_RaveD[s_hash].get_ave()) rows_outL.append(outL) print_string_rows(rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' State-Value Summary, V(s)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of tuples = (s_hash, V) for s_hash, V in self.Vs_RaveD.items(): outL.append((s_hash, V)) lmax_hash = max(lmax_hash, len(str(s_hash))) fmt_hash = '%' + '%is' % lmax_hash outL.sort() # sort in-place for (s_hash, V) in outL: print(' ', fmt_hash % str(s_hash), fmt_V % V) if showRunningAve: for s_hash, RA in self.Vs_RaveD.items( ): # index=state_hash, value=RunningAve object RA.summ_print()
def s_hash_diagram(self, save_name='', basic_color='skyblue', do_show=False, none_str='*', inp_colorD=None, pad=0.05, scale=1.0, h_over_w=1.0 ): """ Create a PNG file of layout Use matplotlib to create a color-coded diagram. if inp_colorD is provided, it has, index=action, value=color string. pad determines the amount of white space between state rectangles. """ if not got_matplotlob: print('WARNING... Need matplotlib to create a diagram... it failed to import.') return local_colorD = {} if self.colorD is not None: local_colorD.update( self.colorD ) if inp_colorD is not None: local_colorD.update( inp_colorD ) if self.basic_color: basic_color = self.basic_color #colorL = ['r','g','b','m','c','y', # 'darkcyan','deepskyblue','darkorange','brown','deeppink', # 'maroon','crimson','seagreen','fuchsia','darkviolet' ] Ncols = len( self.s_hash_rowL[0] ) Nrows = len( self.s_hash_rowL ) #fig, axs = plt.subplots() #fig.set_size_inches(Ncols+1, Nrows+1) w_lr = 1.0 h_tb = 1.0 fig = plt.figure( figsize=( scale*(Ncols+w_lr), h_over_w*scale*(Nrows+h_tb)) ) axs = fig.add_axes() plt.axes() alignment = {'horizontalalignment': 'center', 'verticalalignment': 'center'} font = FontProperties() font.set_size('large') font.set_family('fantasy') font.set_style('normal') d = 1.0 - pad d2 = d / 2.0 for irow,row in enumerate(self.s_hash_rowL): outL = [] x = Nrows - irow - 1 for jcol,s_hash in enumerate(row): if s_hash in local_colorD: c = local_colorD[s_hash] else: c = basic_color if self.environment.is_legal_state( s_hash ): if s_hash in self.named_s_hashD: s = self.named_s_hashD[s_hash] else: s = str( s_hash ) t = plt.text(jcol+d2, x+d2, s, fontproperties=font,**alignment) # Rectangle( (x,y), width, height) rect = Rectangle((jcol,x), d, d, fc=c, alpha=0.6, edgecolor=c) plt.gca().add_patch( rect ) else: if is_literal_str( s_hash ): s = s_hash[1:-1] t = plt.text(jcol+d2, x+d2, s, fontproperties=font,**alignment) else: rect = Rectangle((jcol,x), d, d, fc='lemonchiffon', alpha=0.5, edgecolor='gray') plt.gca().add_patch( rect ) plt.xlim(0, Ncols) plt.ylim(0, Nrows) #plt.axis('off') if self.col_tickL is None: plt.xticks([]) else: plt.xticks( [i+0.5 for i in range(len(self.col_tickL))], [str(ct) for ct in self.col_tickL] ) if self.row_tickL is None: plt.yticks([]) else: plt.yticks( [i+0.5 for i in range(len(self.row_tickL))], reversed([str(rt) for rt in self.row_tickL]) ) plt.box(False) plt.title( self.environment.name ) if self.x_axis_label: plt.xlabel( self.x_axis_label ) if self.y_axis_label: plt.ylabel( self.y_axis_label ) plt.tight_layout() if save_name: if save_name.lower().endswith('.png'): fig.savefig( save_name ) else: fig.savefig( save_name + '.png' ) if do_show: plt.show()
def layout_print(self, vname='reward', fmt='', show_env_states=True, none_str='*'): """print the value "vname" formatted by the environment layout (if present). """ if self.layout is None: print( '...ERROR... "%s" tried to layout_print w/o a defined layout' % self.name) return if show_env_states: self.layout.s_hash_print(none_str=none_str) msgD = {} # initialize special message dictionary to empty if vname == 'reward': valD, msgD = self.get_estimated_rewards( ) # index=s_hash, value=float reward estimate. else: valD = {} # empty if not recognized vname x_axis_label = self.layout.x_axis_label y_axis_label = self.layout.y_axis_label row_tickL = self.layout.row_tickL col_tickL = self.layout.col_tickL rows_outL = [] for row in self.layout.s_hash_rowL: outL = [] for s_hash in row: if s_hash not in self.SC.stateD: if is_literal_str(s_hash): outL.append(s_hash[1:-1]) else: outL.append(none_str) else: val = valD.get(s_hash, None) if val is None: outL.append(none_str) else: if fmt: outL.append(fmt % val) else: outL.append(str(val)) if msgD.get(s_hash, ''): outL[-1] = outL[-1] + msgD.get(s_hash, '') rows_outL.append(outL) if rows_outL: print_string_rows(rows_outL, const_col_w=True, line_chr='_', left_pad=' ', y_axis_label=y_axis_label, row_tickL=row_tickL, col_tickL=col_tickL, header=self.name + ' %s Summary' % vname.title(), x_axis_label=x_axis_label, justify='right')