def plot(self, y="return", x="learning_steps", save=False): """Plots the performance of the experiment This function has only limited capabilities. For more advanced plotting of results consider :py:class:`Tools.Merger.Merger`. """ labels = rlpy.Tools.results.default_labels performance_fig = plt.figure("Performance") res = self.result plt.plot(res[x], res[y], '-bo', lw=3, markersize=10) plt.xlim(0, res[x][-1] * 1.01) y_arr = np.array(res[y]) m = y_arr.min() M = y_arr.max() delta = M - m if delta > 0: plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1) xlabel = labels[x] if x in labels else x ylabel = labels[y] if y in labels else y plt.xlabel(xlabel, fontsize=16) plt.ylabel(ylabel, fontsize=16) if save: path = os.path.join( self.full_path, "{:3}-performance.pdf".format(self.exp_id)) performance_fig.savefig(path, transparent=True, pad_inches=.1) plt.ioff() plt.show()
def plot(self, y="return", x="learning_steps", save=False): """Plots the performance of the experiment This function has only limited capabilities. For more advanced plotting of results consider :py:class:`Tools.Merger.Merger`. """ labels = rlpy.Tools.results.default_labels performance_fig = plt.figure("Performance") res = self.result plt.plot(res[x], res[y], '-bo', lw=3, markersize=10) plt.xlim(0, res[x][-1] * 1.01) y_arr = np.array(res[y]) m = y_arr.min() M = y_arr.max() delta = M - m if delta > 0: plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1) xlabel = labels[x] if x in labels else x ylabel = labels[y] if y in labels else y plt.xlabel(xlabel, fontsize=16) plt.ylabel(ylabel, fontsize=16) if save: path = os.path.join(self.full_path, "{:3}-performance.pdf".format(self.exp_id)) performance_fig.savefig(path, transparent=True, pad_inches=.1) plt.ioff() plt.show()
def showDomain(self, a=None): if a is not None: a = self.actions[a] T = np.empty((self.d, 2)) T[:, 0] = np.cos(self.theta) T[:, 1] = np.sin(self.theta) R = np.dot(self.P, T) R1 = R - .5 * self.lengths[:, None] * T R2 = R + .5 * self.lengths[:, None] * T Rx = np.hstack([R1[:, 0], R2[:, 0]]) + self.pos_cm[0] Ry = np.hstack([R1[:, 1], R2[:, 1]]) + self.pos_cm[1] print(Rx) print(Ry) f = plt.figure("Swimmer Domain") if not hasattr(self, "swimmer_lines"): plt.plot(0., 0., "ro") self.swimmer_lines = plt.plot(Rx, Ry)[0] self.action_text = plt.text(-2, -8, str(a)) plt.xlim(-5, 15) plt.ylim(-10, 10) else: self.swimmer_lines.set_data(Rx, Ry) self.action_text.set_text(str(a)) plt.figure("Swimmer Domain").canvas.draw() plt.figure("Swimmer Domain").canvas.flush_events()
def showDomain(self, a): s = self.state # Draw the environment if self.domain_fig is None: self.domain_fig = plt.imshow(self.map, cmap='IntruderMonitoring', interpolation='nearest', vmin=0, vmax=3) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) plt.show() if self.ally_fig is not None: self.ally_fig.pop(0).remove() self.intruder_fig.pop(0).remove() s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2)) s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2)) self.ally_fig = plt.plot(s_ally[:, 1], s_ally[:, 0], 'bo', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) self.intruder_fig = plt.plot(s_intruder[:, 1], s_intruder[:, 0], 'g>', color='gray', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) plt.draw()
def show_one_variable(variable, vrange, map1="9x9-2PathR1.txt"): x = vrange ctrl = param_ranges(variable, vrange) y = param_ranges(variable, vrange, cur_map=map1) try: plt.title(variable + " vs AUC") plt.ioff() plt.plot(x, ctrl) plt.plot(x, y) plt.legend() plt.show() finally: return ctrl, y
def showDomain(self, a): if self.gcf is None: self.gcf = plt.gcf() s = self.state # Plot the car x, y, speed, heading = s car_xmin = x - self.REAR_WHEEL_RELATIVE_LOC car_ymin = y - self.CAR_WIDTH / 2. if self.domain_fig is None: # Need to initialize the figure self.domain_fig = plt.figure() # Goal plt.gca( ).add_patch( plt.Circle( self.GOAL, radius=self.GOAL_RADIUS, color='g', alpha=.4)) plt.xlim([self.XMIN, self.XMAX]) plt.ylim([self.YMIN, self.YMAX]) plt.gca().set_aspect('1') # Car if self.car_fig is not None: plt.gca().patches.remove(self.car_fig) if self.slips: slip_x, slip_y = zip(*self.slips) try: line = plt.axes().lines[0] if len(line.get_xdata()) != len(slip_x): # if plot has discrepancy from data line.set_xdata(slip_x) line.set_ydata(slip_y) except IndexError: plt.plot(slip_x, slip_y, 'x', color='b') self.car_fig = mpatches.Rectangle( [car_xmin, car_ymin], self.CAR_LENGTH, self.CAR_WIDTH, alpha=.4) rotation = mpl.transforms.Affine2D().rotate_deg_around( x, y, heading * 180 / np.pi) + plt.gca().transData self.car_fig.set_transform(rotation) plt.gca().add_patch(self.car_fig) plt.draw() # self.gcf.canvas.draw() plt.pause(0.001)
def showDomain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow( s, cmap='FlipBoard', interpolation='nearest', vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot( a_col, a_row, 'kx', markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def showDomain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow(s, cmap='FlipBoard', interpolation='nearest', vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot(a_col, a_row, 'kx', markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def showDomain(self, a): s = self.state # Draw the environment if self.domain_fig is None: plt.figure("Domain") self.domain_fig = plt.imshow( self.map, cmap='IntruderMonitoring', interpolation='nearest', vmin=0, vmax=3) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) plt.show() if self.ally_fig is not None: self.ally_fig.pop(0).remove() self.intruder_fig.pop(0).remove() s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2)) s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2)) self.ally_fig = plt.plot( s_ally[:, 1], s_ally[:, 0], 'bo', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) self.intruder_fig = plt.plot( s_intruder[:, 1], s_intruder[:, 0], 'g>', color='gray', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()
def plot_trials(self, y="eps_return", x="learning_steps", average=10, save=False): """Plots the performance of the experiment This function has only limited capabilities. For more advanced plotting of results consider :py:class:`Tools.Merger.Merger`. """ def movingaverage(interval, window_size): window = np.ones(int(window_size)) / float(window_size) return np.convolve(interval, window, 'same') labels = rlpy.Tools.results.default_labels performance_fig = plt.figure("Performance") trials = self.trials y_arr = np.array(trials[y]) if average: assert type(average) is int, "Filter length is not an integer!" y_arr = movingaverage(y_arr, average) plt.plot(trials[x], y_arr, '-bo', lw=3, markersize=10) plt.xlim(0, trials[x][-1] * 1.01) m = y_arr.min() M = y_arr.max() delta = M - m if delta > 0: plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1) xlabel = labels[x] if x in labels else x ylabel = labels[y] if y in labels else y plt.xlabel(xlabel, fontsize=16) plt.ylabel(ylabel, fontsize=16) if save: path = os.path.join(self.full_path, "{:3}-trials.pdf".format(self.exp_id)) performance_fig.savefig(path, transparent=True, pad_inches=.1) plt.ioff() plt.show()
def batchDiscover(self, td_errors, phi, states): # Discovers features using iFDD in batch setting. # TD_Error: p-by-1 (How much error observed for each sample) # phi: n-by-p features corresponding to all samples (each column corresponds to one sample) # self.batchThreshold is the minimum relevance value for the feature to # be expanded SHOW_PLOT = 0 # Shows the histogram of relevances maxDiscovery = self.maxBatchDiscovery n = self.features_num # number of features p = len(td_errors) # Number of samples counts = np.zeros((n, n)) relevances = np.zeros((n, n)) for i in xrange(p): phiphiT = np.outer(phi[i, :], phi[i, :]) if self.iFDDPlus: relevances += phiphiT * td_errors[i] else: relevances += phiphiT * abs(td_errors[i]) counts += phiphiT # Remove Diagonal and upper part of the relevances as they are useless relevances = np.triu(relevances, 1) non_zero_index = np.nonzero(relevances) if self.iFDDPlus: # Calculate relevances based on theoretical results of ICML 2013 # potential submission relevances[non_zero_index] = np.divide( np.abs(relevances[non_zero_index]), np.sqrt(counts[non_zero_index])) else: # Based on Geramifard11_ICML Paper relevances[non_zero_index] = relevances[non_zero_index] # Find indexes to non-zero excited pairs # F1 and F2 are the parents of the potentials (F1, F2) = relevances.nonzero() relevances = relevances[F1, F2] if len(relevances) == 0: # No feature to add self.logger.debug("iFDD Batch: Max Relevance = 0") return False if SHOW_PLOT: e_vec = relevances.flatten() e_vec = e_vec[e_vec != 0] e_vec = np.sort(e_vec) plt.ioff() plt.plot(e_vec, linewidth=3) plt.show() # Sort based on relevances # We want high to low hence the reverse: [::-1] sortedIndices = np.argsort(relevances)[::-1] max_relevance = relevances[sortedIndices[0]] # Add top <maxDiscovery> features self.logger.debug( "iFDD Batch: Max Relevance = {0:g}".format(max_relevance)) added_feature = False new_features = 0 for j in xrange(len(relevances)): if new_features >= maxDiscovery: break max_index = sortedIndices[j] f1 = F1[max_index] f2 = F2[max_index] relevance = relevances[max_index] if relevance > self.batchThreshold: # print "Inspecting", # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2) if self.inspectPair(f1, f2, np.inf): self.logger.debug( 'New Feature %d: %s, Relevance = %0.3f' % (self.features_num - 1, self.getStrFeatureSet(self.features_num - 1), relevances[max_index])) new_features += 1 added_feature = True else: # Because the list is sorted, there is no use to look at the # others break return ( # A signal to see if the representation has been expanded or not added_feature)
def showDomain(self, a): s = self.state # Plot the car and an arrow indicating the direction of accelaration # Parts of this code was adopted from Jose Antonio Martin H. # <*****@*****.**> online source code pos, vel = s if self.domain_fig is None: # Need to initialize the figure self.domain_fig = plt.figure("Mountain Car Domain") # plot mountain mountain_x = np.linspace(self.XMIN, self.XMAX, 1000) mountain_y = np.sin(3 * mountain_x) plt.gca( ).fill_between(mountain_x, min(mountain_y) - self.CAR_HEIGHT * 2, mountain_y, color='g') plt.xlim([self.XMIN - .2, self.XMAX]) plt.ylim( [min(mountain_y) - self.CAR_HEIGHT * 2, max(mountain_y) + self.CAR_HEIGHT * 2]) # plot car self.car = lines.Line2D([], [], linewidth=20, color='b', alpha=.8) plt.gca().add_line(self.car) # Goal plt.plot(self.GOAL, np.sin(3 * self.GOAL), 'yd', markersize=10.0) plt.axis('off') plt.gca().set_aspect('1') self.domain_fig = plt.figure("Mountain Car Domain") #pos = 0 #a = 0 car_middle_x = pos car_middle_y = np.sin(3 * pos) slope = np.arctan(3 * np.cos(3 * pos)) car_back_x = car_middle_x - self.CAR_WIDTH * np.cos(slope) / 2. car_front_x = car_middle_x + self.CAR_WIDTH * np.cos(slope) / 2. car_back_y = car_middle_y - self.CAR_WIDTH * np.sin(slope) / 2. car_front_y = car_middle_y + self.CAR_WIDTH * np.sin(slope) / 2. self.car.set_data([car_back_x, car_front_x], [car_back_y, car_front_y]) # wheels # plott(x(1)-0.05,sin(3*(x(1)-0.05))+0.06,'ok','markersize',12,'MarkerFaceColor',[.5 .5 .5]); # plot(x(1)+0.05,sin(3*(x(1)+0.05))+0.06,'ok','markersize',12,'MarkerFaceColor',[.5 .5 .5]); # Arrows if self.actionArrow is not None: self.actionArrow.remove() self.actionArrow = None if self.actions[a] > 0: self.actionArrow = fromAtoB( car_front_x, car_front_y, car_front_x + self.ARROW_LENGTH * np.cos(slope), car_front_y + self.ARROW_LENGTH * np.sin(slope), #car_front_x + self.CAR_WIDTH*cos(slope)/2., car_front_y + self.CAR_WIDTH*sin(slope)/2.+self.CAR_HEIGHT, 'k', "arc3,rad=0", 0, 0, 'simple' ) if self.actions[a] < 0: self.actionArrow = fromAtoB( car_back_x, car_back_y, car_back_x - self.ARROW_LENGTH * np.cos(slope), car_back_y - self.ARROW_LENGTH * np.sin(slope), #car_front_x + self.CAR_WIDTH*cos(slope)/2., car_front_y + self.CAR_WIDTH*sin(slope)/2.+self.CAR_HEIGHT, 'r', "arc3,rad=0", 0, 0, 'simple' ) plt.draw()
def batchDiscover(self, td_errors, phi, states): """ :param td_errors: p-by-1 vector, error associated with each state :param phi: p-by-n matrix, vector-valued feature function evaluated at each state. :param states: p-by-(statedimension) matrix, each state under test. Discovers features using OMPTD 1. Find the index of remaining features in the bag \n 2. Calculate the inner product of each feature with the TD_Error vector \n 3. Add the top maxBatchDiscovery features to the selected features \n OUTPUT: Boolean indicating expansion of features """ if len(self.remainingFeatures) == 0: # No More features to Expand return False SHOW_RELEVANCES = 0 # Plot the relevances self.calculateFullPhiNormalized(states) relevances = np.zeros(len(self.remainingFeatures)) for i, f in enumerate(self.remainingFeatures): phi_f = self.fullphi[:, f] relevances[i] = np.abs(np.dot(phi_f, td_errors)) if SHOW_RELEVANCES: e_vec = relevances.flatten() e_vec = e_vec[e_vec != 0] e_vec = np.sort(e_vec) plt.plot(e_vec, linewidth=3) plt.ioff() plt.show() plt.ion() # Sort based on relevances # We want high to low hence the reverse: [::-1] sortedIndices = np.argsort(relevances)[::-1] max_relevance = relevances[sortedIndices[0]] # Add top <maxDiscovery> features self.logger.debug("OMPTD Batch: Max Relevance = %0.3f" % max_relevance) added_feature = False to_be_deleted = [] # Record the indices of items to be removed for j in xrange(min(self.maxBatchDiscovery, len(relevances))): max_index = sortedIndices[j] f = self.remainingFeatures[max_index] relevance = relevances[max_index] # print "Inspecting %s" % str(list(self.iFDD.getFeature(f).f_set)) if relevance >= self.batchThreshold: self.logger.debug( 'New Feature %d: %s, Relevance = %0.3f' % (self.features_num, str(np.sort(list(self.iFDD.getFeature(f).f_set))), relevances[max_index])) to_be_deleted.append(max_index) self.selectedFeatures.append(f) self.features_num += 1 added_feature = True else: # Because the list is sorted, there is no use to look at the # others break self.remainingFeatures = np.delete(self.remainingFeatures, to_be_deleted) return added_feature
kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=False, max_active_base_feat=100, max_base_feat_sim=max_base_feat_sim) policy = SwimmerPolicy(representation) #policy = eGreedy(representation, epsilon=0.1) stat_bins_per_state_dim = 20 # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = SARSA( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment if __name__ == '__main__': from rlpy.Tools.run import run_profiled # run_profiled(make_experiment) experiment = make_experiment(1) experiment.run(visualize_performance=1, visualize_learning=True) # experiment.plot() # experiment.save() from rlpy.Tools import plt plt.figure() for i in range(9): plt.plot(experiment.state_counts_learn[i], label="Dim " + str(i)) plt.legend()
def batchDiscover(self, td_errors, phi, states): # Discovers features using iFDD in batch setting. # TD_Error: p-by-1 (How much error observed for each sample) # phi: n-by-p features corresponding to all samples (each column corresponds to one sample) # self.batchThreshold is the minimum relevance value for the feature to # be expanded SHOW_PLOT = 0 # Shows the histogram of relevances maxDiscovery = self.maxBatchDiscovery n = self.features_num # number of features p = len(td_errors) # Number of samples counts = np.zeros((n, n)) relevances = np.zeros((n, n)) for i in xrange(p): phiphiT = np.outer(phi[i, :], phi[i,:]) if self.iFDDPlus: relevances += phiphiT * td_errors[i] else: relevances += phiphiT * abs(td_errors[i]) counts += phiphiT # Remove Diagonal and upper part of the relevances as they are useless relevances = np.triu(relevances, 1) non_zero_index = np.nonzero(relevances) if self.iFDDPlus: # Calculate relevances based on theoretical results of ICML 2013 # potential submission relevances[non_zero_index] = np.divide( np.abs(relevances[non_zero_index]), np.sqrt(counts[non_zero_index])) else: # Based on Geramifard11_ICML Paper relevances[non_zero_index] = relevances[non_zero_index] # Find indexes to non-zero excited pairs # F1 and F2 are the parents of the potentials (F1, F2) = relevances.nonzero() relevances = relevances[F1, F2] if len(relevances) == 0: # No feature to add self.logger.debug("iFDD Batch: Max Relevance = 0") return False if SHOW_PLOT: e_vec = relevances.flatten() e_vec = e_vec[e_vec != 0] e_vec = np.sort(e_vec) plt.ioff() plt.plot(e_vec, linewidth=3) plt.show() # Sort based on relevances # We want high to low hence the reverse: [::-1] sortedIndices = np.argsort(relevances)[::-1] max_relevance = relevances[sortedIndices[0]] # Add top <maxDiscovery> features self.logger.debug( "iFDD Batch: Max Relevance = {0:g}".format(max_relevance)) added_feature = False new_features = 0 for j in xrange(len(relevances)): if new_features >= maxDiscovery: break max_index = sortedIndices[j] f1 = F1[max_index] f2 = F2[max_index] relevance = relevances[max_index] if relevance > self.batchThreshold: # print "Inspecting", # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2) if self.inspectPair(f1, f2, np.inf): self.logger.debug( 'New Feature %d: %s, Relevance = %0.3f' % (self.features_num - 1, self.getStrFeatureSet(self.features_num - 1), relevances[max_index])) new_features += 1 added_feature = True else: # Because the list is sorted, there is no use to look at the # others break return ( # A signal to see if the representation has been expanded or not added_feature )