def plot(self, y="return", x="learning_steps", save=False):
     """Plots the performance of the experiment
     This function has only limited capabilities.
     For more advanced plotting of results consider
     :py:class:`Tools.Merger.Merger`.
     """
     labels = rlpy.Tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], '-bo', lw=3, markersize=10)
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(self.full_path,
                             "{:3}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=.1)
     plt.ioff()
     plt.show()
Exemple #2
0
 def plot(self, y="return", x="learning_steps", save=False):
     """Plots the performance of the experiment
     This function has only limited capabilities.
     For more advanced plotting of results consider
     :py:class:`Tools.Merger.Merger`.
     """
     labels = rlpy.Tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], '-bo', lw=3, markersize=10)
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(
             self.full_path,
             "{:3}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=.1)
     plt.ioff()
     plt.show()
def show_one_variable(variable, vrange, map1="9x9-2PathR1.txt"):
	x = vrange
	ctrl = param_ranges(variable, vrange)
	y = param_ranges(variable, vrange, cur_map=map1)
	try:
		plt.title(variable + " vs AUC")
		plt.ioff()
		plt.plot(x, ctrl)
		plt.plot(x, y)
		plt.legend()
		plt.show()
	finally:
		return ctrl, y
Exemple #4
0
    def plot_trials(self,
                    y="eps_return",
                    x="learning_steps",
                    average=10,
                    save=False):
        """Plots the performance of the experiment
        This function has only limited capabilities.
        For more advanced plotting of results consider
        :py:class:`Tools.Merger.Merger`.
        """
        def movingaverage(interval, window_size):
            window = np.ones(int(window_size)) / float(window_size)
            return np.convolve(interval, window, 'same')

        labels = rlpy.Tools.results.default_labels
        performance_fig = plt.figure("Performance")
        trials = self.trials
        y_arr = np.array(trials[y])
        if average:
            assert type(average) is int, "Filter length is not an integer!"
            y_arr = movingaverage(y_arr, average)
        plt.plot(trials[x], y_arr, '-bo', lw=3, markersize=10)
        plt.xlim(0, trials[x][-1] * 1.01)
        m = y_arr.min()
        M = y_arr.max()
        delta = M - m
        if delta > 0:
            plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
        xlabel = labels[x] if x in labels else x
        ylabel = labels[y] if y in labels else y
        plt.xlabel(xlabel, fontsize=16)
        plt.ylabel(ylabel, fontsize=16)
        if save:
            path = os.path.join(self.full_path,
                                "{:3}-trials.pdf".format(self.exp_id))
            performance_fig.savefig(path, transparent=True, pad_inches=.1)
        plt.ioff()
        plt.show()
Exemple #5
0
    def batchDiscover(self, td_errors, phi, states):
        # Discovers features using iFDD in batch setting.
        # TD_Error: p-by-1 (How much error observed for each sample)
        # phi: n-by-p features corresponding to all samples (each column corresponds to one sample)
        # self.batchThreshold is the minimum relevance value for the feature to
        # be expanded
        SHOW_PLOT = 0  # Shows the histogram of relevances
        maxDiscovery = self.maxBatchDiscovery
        n = self.features_num  # number of features
        p = len(td_errors)  # Number of samples
        counts = np.zeros((n, n))
        relevances = np.zeros((n, n))
        for i in xrange(p):
            phiphiT = np.outer(phi[i, :], phi[i, :])
            if self.iFDDPlus:
                relevances += phiphiT * td_errors[i]
            else:
                relevances += phiphiT * abs(td_errors[i])
            counts += phiphiT
        # Remove Diagonal and upper part of the relevances as they are useless
        relevances = np.triu(relevances, 1)
        non_zero_index = np.nonzero(relevances)
        if self.iFDDPlus:
            # Calculate relevances based on theoretical results of ICML 2013
            # potential submission
            relevances[non_zero_index] = np.divide(
                np.abs(relevances[non_zero_index]),
                np.sqrt(counts[non_zero_index]))
        else:
            # Based on Geramifard11_ICML Paper
            relevances[non_zero_index] = relevances[non_zero_index]

        # Find indexes to non-zero excited pairs
        # F1 and F2 are the parents of the potentials
        (F1, F2) = relevances.nonzero()
        relevances = relevances[F1, F2]
        if len(relevances) == 0:
            # No feature to add
            self.logger.debug("iFDD Batch: Max Relevance = 0")
            return False

        if SHOW_PLOT:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.ioff()
            plt.plot(e_vec, linewidth=3)
            plt.show()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]
        # Add top <maxDiscovery> features
        self.logger.debug(
            "iFDD Batch: Max Relevance = {0:g}".format(max_relevance))
        added_feature = False
        new_features = 0
        for j in xrange(len(relevances)):
            if new_features >= maxDiscovery:
                break
            max_index = sortedIndices[j]
            f1 = F1[max_index]
            f2 = F2[max_index]
            relevance = relevances[max_index]
            if relevance > self.batchThreshold:
                # print "Inspecting",
                # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2)
                if self.inspectPair(f1, f2, np.inf):
                    self.logger.debug(
                        'New Feature %d: %s, Relevance = %0.3f' %
                        (self.features_num - 1,
                         self.getStrFeatureSet(self.features_num - 1),
                         relevances[max_index]))
                    new_features += 1
                    added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        return (
            # A signal to see if the representation has been expanded or not
            added_feature)
Exemple #6
0
    def batchDiscover(self, td_errors, phi, states):
        """
        :param td_errors: p-by-1 vector, error associated with each state
        :param phi: p-by-n matrix, vector-valued feature function evaluated at 
            each state.
        :param states: p-by-(statedimension) matrix, each state under test.
        
        Discovers features using OMPTD
        1. Find the index of remaining features in the bag \n
        2. Calculate the inner product of each feature with the TD_Error vector \n
        3. Add the top maxBatchDiscovery features to the selected features \n
        
        OUTPUT: Boolean indicating expansion of features
        
        """
        if len(self.remainingFeatures) == 0:
            # No More features to Expand
            return False

        SHOW_RELEVANCES = 0      # Plot the relevances
        self.calculateFullPhiNormalized(states)

        relevances = np.zeros(len(self.remainingFeatures))
        for i, f in enumerate(self.remainingFeatures):
            phi_f = self.fullphi[:, f]
            relevances[i] = np.abs(np.dot(phi_f, td_errors))

        if SHOW_RELEVANCES:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.plot(e_vec, linewidth=3)
            plt.ioff()
            plt.show()
            plt.ion()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]

        # Add top <maxDiscovery> features
        self.logger.debug("OMPTD Batch: Max Relevance = %0.3f" % max_relevance)
        added_feature = False
        to_be_deleted = []  # Record the indices of items to be removed
        for j in xrange(min(self.maxBatchDiscovery, len(relevances))):
            max_index = sortedIndices[j]
            f = self.remainingFeatures[max_index]
            relevance = relevances[max_index]
            # print "Inspecting %s" % str(list(self.iFDD.getFeature(f).f_set))
            if relevance >= self.batchThreshold:
                self.logger.debug(
                    'New Feature %d: %s, Relevance = %0.3f' %
                    (self.features_num, str(np.sort(list(self.iFDD.getFeature(f).f_set))), relevances[max_index]))
                to_be_deleted.append(max_index)
                self.selectedFeatures.append(f)
                self.features_num += 1
                added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        self.remainingFeatures = np.delete(self.remainingFeatures, to_be_deleted)
        return added_feature
Exemple #7
0
    def batchDiscover(self, td_errors, phi, states):
        # Discovers features using iFDD in batch setting.
        # TD_Error: p-by-1 (How much error observed for each sample)
        # phi: n-by-p features corresponding to all samples (each column corresponds to one sample)
        # self.batchThreshold is the minimum relevance value for the feature to
        # be expanded
        SHOW_PLOT = 0  # Shows the histogram of relevances
        maxDiscovery = self.maxBatchDiscovery
        n = self.features_num  # number of features
        p = len(td_errors)  # Number of samples
        counts = np.zeros((n, n))
        relevances = np.zeros((n, n))
        for i in xrange(p):
            phiphiT = np.outer(phi[i, :], phi[i,:])
            if self.iFDDPlus:
                relevances += phiphiT * td_errors[i]
            else:
                relevances += phiphiT * abs(td_errors[i])
            counts += phiphiT
        # Remove Diagonal and upper part of the relevances as they are useless
        relevances = np.triu(relevances, 1)
        non_zero_index = np.nonzero(relevances)
        if self.iFDDPlus:
            # Calculate relevances based on theoretical results of ICML 2013
            # potential submission
            relevances[non_zero_index] = np.divide(
                np.abs(relevances[non_zero_index]),
                np.sqrt(counts[non_zero_index]))
        else:
            # Based on Geramifard11_ICML Paper
            relevances[non_zero_index] = relevances[non_zero_index]

        # Find indexes to non-zero excited pairs
        # F1 and F2 are the parents of the potentials
        (F1, F2) = relevances.nonzero()
        relevances = relevances[F1, F2]
        if len(relevances) == 0:
            # No feature to add
            self.logger.debug("iFDD Batch: Max Relevance = 0")
            return False

        if SHOW_PLOT:
            e_vec = relevances.flatten()
            e_vec = e_vec[e_vec != 0]
            e_vec = np.sort(e_vec)
            plt.ioff()
            plt.plot(e_vec, linewidth=3)
            plt.show()

        # Sort based on relevances
        # We want high to low hence the reverse: [::-1]
        sortedIndices = np.argsort(relevances)[::-1]
        max_relevance = relevances[sortedIndices[0]]
        # Add top <maxDiscovery> features
        self.logger.debug(
            "iFDD Batch: Max Relevance = {0:g}".format(max_relevance))
        added_feature = False
        new_features = 0
        for j in xrange(len(relevances)):
            if new_features >= maxDiscovery:
                break
            max_index = sortedIndices[j]
            f1 = F1[max_index]
            f2 = F2[max_index]
            relevance = relevances[max_index]
            if relevance > self.batchThreshold:
                # print "Inspecting",
                # f1,f2,'=>',self.getStrFeatureSet(f1),self.getStrFeatureSet(f2)
                if self.inspectPair(f1, f2, np.inf):
                    self.logger.debug(
                        'New Feature %d: %s, Relevance = %0.3f' %
                        (self.features_num - 1, self.getStrFeatureSet(self.features_num - 1), relevances[max_index]))
                    new_features += 1
                    added_feature = True
            else:
                # Because the list is sorted, there is no use to look at the
                # others
                break
        return (
            # A signal to see if the representation has been expanded or not
            added_feature
        )