Beispiel #1
0
def oneUpdate(directory,plot_results=False):
    
    inputs = np.linspace(-1.5,1.5,21);
    a = 2.0
    c = -1.0
    n_params = 2

    regularization = 0.01
    task = DemoTaskApproximateQuadraticFunction(a,c,inputs,regularization)
  
    mean_init  =  np.full(n_params,0.5)
    covar_init =  0.25*np.eye(n_params)
    initial_distribution = DistributionGaussian(mean_init, covar_init)
  
    eliteness = 10
    weighting_method = 'PI-BB'
    covar_decay_factor = 0.8
    updater = UpdaterCovarDecay(eliteness,weighting_method,covar_decay_factor)
  
    n_samples_per_update = 10
    
    i_update = runOptimizationTaskOneUpdate(directory, task, initial_distribution, updater, n_samples_per_update)

    i_update -= 1
    if plot_results and i_update>1:
        # Plot the optimization results (from the files saved to disk)
        fig = plt.figure(1,figsize=(15, 5))
        plotOptimizationRollouts(directory,fig,task.plotRollout)
        plt.show()
Beispiel #2
0
    def updateDistribution(self, distribution, samples, costs):
        """ Update a distribution with reward-weighted averaging.
        \param[in] distribution Distribution before the update
        \param[in] samples Samples in parameter space.
        \param[in] costs The cost of each sample.
        \return The updated distribution.
        """

        mean_cur = distribution.mean
        covar_cur = distribution.covar
        n_samples = samples.shape[0]
        n_dims = samples.shape[1]

        weights = costsToWeights(costs, self.weighting_method, self.eliteness)

        # Compute new mean with reward-weighed averaging
        # mean    = 1 x n_dims
        # weights = 1 x n_samples
        # samples = n_samples x n_dims
        mean_new = np.average(samples, 0, weights)

        #np.set_printoptions(precision=4, suppress=True)
        eps = samples - np.tile(mean_cur, (n_samples, 1))
        weights_tiled = np.tile(np.asarray([weights]).transpose(), (1, n_dims))
        weighted_eps = np.multiply(weights_tiled, eps)
        covar_new = np.dot(weighted_eps.transpose(), eps)

        # Remove non-diagonal values
        if (self.diag_only):
            diag_vec = np.diag(covar_new)
            covar_new = np.diag(diag_vec)

        # Low-pass filter for covariance matrix, i.e. weight between current
        # and new covariance matrix.

        if (self.learning_rate < 1.0):
            lr = self.learning_rate  # For legibility
            covar_new = (1 - lr) * covar_cur + lr * covar_new

        # Add a base_level to avoid pre-mature convergence
        if (self.base_level_diagonal is not None):
            for ii in range(n_dims):
                if covar_new[ii, ii] < self.base_level_diagonal[ii]:
                    covar_new[ii, ii] = self.base_level_diagonal[ii]

        # Update the covariance matrix
        distribution_new = DistributionGaussian(mean_new, covar_new)

        return distribution_new, weights
Beispiel #3
0
    def updateDistribution(self, distribution, samples, costs):
        """ Update a distribution with reward-weighted averaging.
        \param[in] distribution Distribution before the update
        \param[in] samples Samples in parameter space.
        \param[in] costs The cost of each sample.
        \return The updated distribution.
        """

        weights = costsToWeights(costs, self.weighting_method, self.eliteness)

        # Compute new mean with reward-weighed averaging
        # mean    = 1 x n_dims
        # weights = 1 x n_samples
        # samples = n_samples x n_dims
        mean_new = np.average(samples, 0, weights)

        # Update the covariance matrix
        distribution_new = DistributionGaussian(mean_new, distribution.covar)

        return distribution_new, weights

if __name__ == "__main__":

    directory = None
    if (len(sys.argv) > 1):
        directory = sys.argv[1]

    n_dims = 2
    minimum = np.full(n_dims, 2.0)
    regul_weight = 1.0
    cost_function = DemoCostFunctionDistanceToPoint(minimum, regul_weight)

    mean_init = np.full(n_dims, 5.0)
    covar_init = 4.0 * np.eye(n_dims)
    distribution = DistributionGaussian(mean_init, covar_init)

    eliteness = 10
    weighting_method = 'PI-BB'
    covar_decay_factor = 0.8
    updater = UpdaterCovarDecay(eliteness, weighting_method,
                                covar_decay_factor)

    n_samples_per_update = 20
    n_updates = 40

    import matplotlib.pyplot as plt
    fig = plt.figure(1, figsize=(15, 5))

    learning_curve = runOptimization(cost_function, distribution, updater,
                                     n_updates, n_samples_per_update, fig,
Beispiel #5
0
    # Normalize weights
    weights = weights/sum(weights)
    
    return weights


if __name__=="__main__":
    eliteness = 10
    weighting_method = 'PI-BB'
    covar_decay_factor = 0.8
    updater = UpdaterCovarDecay(eliteness,weighting_method,covar_decay_factor)
    
    diagonal_min = 0.1
    diagonal_max = 1.0
    diag_only=False
    learning_rate=1.0
    updater = UpdaterCovarAdaptation(eliteness, weighting_method,diagonal_max,diagonal_min,diag_only,learning_rate)

    mu  = np.array([2,4])
    cov = np.array([[0.3,0.0],[0.0,0.5]])
    distribution = DistributionGaussian(mu,cov)
    
    n_samples = 10
    samples = distribution.generateSamples(n_samples)
    costs = abs(samples[:,0]) + abs(samples[:,1]) # Manhattan distance
    
    (new_distribution, w) = updater.updateDistribution(distribution, samples, costs)
    print(distribution.covar)
    print(new_distribution.covar)
Beispiel #6
0
def plotOptimizationRollouts(directory,
                             fig,
                             plotRollout=None,
                             plot_all_rollouts=False):

    if not fig:
        fig = plt.figure(1, figsize=(9, 4))

    # Determine number of updates
    n_updates = 1
    update_dir = '%s/update%05d' % (directory, n_updates)
    while containsNewDistribution(update_dir):
        n_updates += 1
        update_dir = '%s/update%05d' % (directory, n_updates)
    n_updates -= 1

    if n_updates < 2:
        return None

    all_costs = []
    learning_curve = []
    exploration_curve = []

    i_samples = 0
    for i_update in range(n_updates):

        update_dir = '%s/update%05d' % (directory, i_update)

        # Read data
        mean = np.loadtxt(update_dir + "/distribution_mean.txt")
        covar = np.loadtxt(update_dir + "/distribution_covar.txt")
        distribution = DistributionGaussian(mean, covar)

        try:
            mean = np.loadtxt(update_dir + "/distribution_new_mean.txt")
            covar = np.loadtxt(update_dir + "/distribution_new_covar.txt")
            distribution_new = DistributionGaussian(mean, covar)
        except IOError:
            distribution_new = None

        try:
            covar_block_sizes = np.loadtxt(update_dir +
                                           "/covar_block_sizes.txt")
        except IOError:
            covar_block_sizes = len(distribution.mean)

        try:
            samples = np.loadtxt(update_dir + "/samples.txt")
        except IOError:
            samples = None
        costs = np.loadtxt(update_dir + "/costs.txt")
        weights = np.loadtxt(update_dir + "/weights.txt")

        try:
            cost_eval = np.loadtxt(update_dir + "/cost_eval.txt")
        except IOError:
            cost_eval = None

        n_rollouts = len(weights)
        rollouts = []
        for i_rollout in range(n_rollouts):
            rollout_dir = '%s/rollout%03d' % (update_dir, i_rollout + 1)
            rollouts.append(loadRolloutFromDirectory(rollout_dir))

        rollout_eval = loadRolloutFromDirectory(update_dir + '/rollout_eval/')

        # Update learning curve
        # Bookkeeping and plotting
        # All the costs so far
        all_costs.extend(costs)
        # Update exploration curve
        i_samples = i_samples + n_rollouts
        cur_exploration = np.sqrt(distribution.maxEigenValue())
        exploration_curve.append([i_samples, cur_exploration])
        # Update learning curve
        learning_curve.append([i_samples])
        learning_curve[-1].extend(np.atleast_1d(cost_eval))

        n_subplots = 3
        i_subplot = 1
        if plotRollout:
            n_subplots = 4
            ax_rollout = fig.add_subplot(1, n_subplots, i_subplot)
            i_subplot += 1
            h = plotRollout(rollout_eval.cost_vars, ax_rollout)
            setColor(h, i_update, n_updates)

        ax_space = fig.add_subplot(1, n_subplots, i_subplot)
        i_subplot += 1
        highlight = (i_update == 0)
        plotUpdate(distribution, cost_eval, samples, costs, weights,
                   distribution_new, ax_space, highlight)

    plotExplorationCurve(exploration_curve,
                         fig.add_subplot(1, n_subplots, i_subplot))
    plotLearningCurve(learning_curve,
                      fig.add_subplot(1, n_subplots, i_subplot + 1))
Beispiel #7
0
    return weights


if __name__ == "__main__":
    eliteness = 10
    weighting_method = 'PI-BB'
    covar_decay_factor = 0.8
    updater = UpdaterCovarDecay(eliteness, weighting_method,
                                covar_decay_factor)

    diagonal_min = 0.1
    diagonal_max = 1.0
    diag_only = False
    learning_rate = 1.0
    updater = UpdaterCovarAdaptation(eliteness, weighting_method, diagonal_max,
                                     diagonal_min, diag_only, learning_rate)

    mu = np.array([2, 4])
    cov = np.array([[0.3, 0.0], [0.0, 0.5]])
    distribution = DistributionGaussian(mu, cov)

    n_samples = 10
    samples = distribution.generateSamples(n_samples)
    costs = abs(samples[:, 0]) + abs(samples[:, 1])  # Manhattan distance

    (new_distribution, w) = updater.updateDistribution(distribution, samples,
                                                       costs)
    print(distribution.covar)
    print(new_distribution.covar)
    if (len(sys.argv)>3):
        sigma = float(sys.argv[4])
        
    n_samples = 10    
    if (len(sys.argv)>4):
        n_samples = int(sys.argv[5])
        
    print("Python | calling "+" ".join(sys.argv))
    
    print('Python |     Loading mean from "'+input_parameters_file+'"')
    parameter_vector = np.loadtxt(input_parameters_file)
    
    
    print('Python |     Generating '+str(n_samples)+' samples with sigma='+str(sigma))
    covar_init =  sigma*sigma*np.eye(parameter_vector.size)
    distribution = DistributionGaussian(parameter_vector, covar_init)
    samples = distribution.generateSamples(n_samples)

    print('Python |     Saving covar to "'+output_covar_file+'"')
    np.savetxt(output_covar_file,covar_init)
    
    print('Python |     Saving samples to '+output_directory)
    for i_sample in range(n_samples):
        
        rollout_directory = '%s/rollout%03d/' % (output_directory, i_sample+1)
        sample_filename = rollout_directory+'policy_parameters.txt'
        
        print('Python |         Saving sample '+str(i_sample)+' to '+sample_filename)
        if not os.path.exists(rollout_directory):
            os.makedirs(rollout_directory)
        np.savetxt(sample_filename,samples[i_sample,:])
Beispiel #9
0
def plotOptimizationRollouts(directory,
                             fig,
                             plotRollout=None,
                             plot_all_rollouts=False,
                             dimensionality_reduction='SVD'):

    if not fig:
        fig = plt.figure(1, figsize=(9, 4))

    # Determine number of updates
    n_updates = 1
    update_dir = '%s/update%05d' % (directory, n_updates)
    while containsNewDistribution(update_dir):
        n_updates += 1
        update_dir = '%s/update%05d' % (directory, n_updates)
    n_updates -= 1

    if n_updates < 2:
        return None

    all_costs = []
    learning_curve = []
    exploration_curve = []

    i_samples = 0

    if dimensionality_reduction != None:
        # Do dimensionality reduction by finding the 2 principal direction out of the n-dimensional set of data (means of gaussian distributions)
        # 1) Fetch the data from folder and them stack them in the array time_series_distribution
        # 2) Do dimensionality reduction using SVD or TSNE

        # 1)
        time_series_distribution = []
        for i_update in range(n_updates + 1):
            update_dir = '%s/update%05d' % (directory, i_update)

            # Read data
            try:
                mean = np.loadtxt(update_dir + "/distribution_mean.txt")
                time_series_distribution.append(mean)

            except IOError:
                print("IOError")
                return (-1)

        # 2)
        if dimensionality_reduction == 'SVD':
            # time_series_distribution = PCA(n_components=2).fit_transform(time_series_distribution)

            U, S, V = np.linalg.svd(time_series_distribution,
                                    full_matrices=True)
            # Truncated SVD
            time_series_distribution = np.matmul(U[:, 0:2], np.diag(S[0:2]))
            print("manual truncation ", time_series_distribution)

        elif dimensionality_reduction == 'TSNE':
            time_series_distribution = TSNE(
                n_components=2,
                perplexity=5).fit_transform(time_series_distribution)

    for i_update in range(n_updates):

        update_dir = '%s/update%05d' % (directory, i_update)

        if dimensionality_reduction != None:
            try:
                # Read data
                mean = time_series_distribution[i_update, :]
                covar = np.loadtxt(update_dir + "/distribution_covar.txt")
            except IOError:
                distribution_new = None

            if dimensionality_reduction == 'SVD':
                # change coordinates for the covariance matrix into principal directions using V
                covar = np.matmul(np.matmul(np.transpose(V), covar), V)
                distribution = DistributionGaussian(mean, covar[0:2, 0:2])
            elif dimensionality_reduction == 'TSNE':
                # for tSNE we have no choice, do an SVD of the covariance matrix and take its principal singular values
                U_tSNE, S_tSNE, V_tSNE = np.linalg.svd(covar,
                                                       full_matrices=True)
                distribution = DistributionGaussian(mean, np.diag(S_tSNE[0:2]))

            try:
                mean = time_series_distribution[i_update + 1, :]
                covar = np.loadtxt(update_dir + "/distribution_new_covar.txt")
            except IOError:
                distribution_new = None

            if dimensionality_reduction == 'SVD':
                # change coordinates for the covariance matrix into principal directions using V
                covar = np.matmul(np.matmul(np.transpose(V), covar), V)
                distribution_new = DistributionGaussian(mean, covar[0:2, 0:2])

            elif dimensionality_reduction == 'TSNE':
                # for tSNE we have no choice, do an SVD of the covariance matrix and take its principal singular values
                U_tSNE, S_tSNE, V_tSNE = np.linalg.svd(covar,
                                                       full_matrices=True)
                distribution_new = DistributionGaussian(
                    mean, np.diag(S_tSNE[0:2]))

        else:
            # Read data
            mean = np.loadtxt(update_dir + "/distribution_mean.txt")
            covar = np.loadtxt(update_dir + "/distribution_covar.txt")
            distribution = DistributionGaussian(mean, covar)

            try:
                mean = np.loadtxt(update_dir + "/distribution_new_mean.txt")
                covar = np.loadtxt(update_dir + "/distribution_new_covar.txt")
                distribution_new = DistributionGaussian(mean, covar)
            except IOError:
                distribution_new = None

        try:
            covar_block_sizes = np.loadtxt(update_dir +
                                           "/covar_block_sizes.txt")
        except IOError:
            covar_block_sizes = len(distribution.mean)

        try:
            samples = np.loadtxt(update_dir + "/samples.txt")
        except IOError:
            samples = None
        costs = np.loadtxt(update_dir + "/costs.txt")
        weights = np.loadtxt(update_dir + "/weights.txt")

        try:
            cost_eval = np.loadtxt(update_dir + "/cost_eval.txt")
        except IOError:
            cost_eval = None

        n_rollouts = len(weights)
        rollouts = []
        for i_rollout in range(n_rollouts):
            rollout_dir = '%s/rollout%03d' % (update_dir, i_rollout + 1)
            rollouts.append(loadRolloutFromDirectory(rollout_dir))

        rollout_eval = loadRolloutFromDirectory(update_dir + '/rollout_eval/')

        # Update learning curve
        # Bookkeeping and plotting
        # All the costs so far
        all_costs.extend(costs)
        # Update exploration curve
        i_samples = i_samples + n_rollouts
        cur_exploration = np.sqrt(distribution.maxEigenValue())
        exploration_curve.append([i_samples, cur_exploration])
        # Update learning curve
        learning_curve.append([i_samples])
        learning_curve[-1].extend(np.atleast_1d(cost_eval))

        n_subplots = 3
        i_subplot = 1
        if plotRollout:
            n_subplots = 4
            ax_rollout = fig.add_subplot(1, n_subplots, i_subplot)
            i_subplot += 1
            h = plotRollout(rollout_eval.cost_vars, ax_rollout)
            setColor(h, i_update, n_updates)

        ax_space = fig.add_subplot(1, n_subplots, i_subplot)
        i_subplot += 1
        highlight = (i_update == 0)
        plotUpdate(distribution,
                   cost_eval,
                   samples,
                   costs,
                   weights,
                   distribution_new,
                   ax_space,
                   highlight,
                   plot_samples=False)

    plotExplorationCurve(exploration_curve,
                         fig.add_subplot(1, n_subplots, i_subplot))
    plotLearningCurve(learning_curve,
                      fig.add_subplot(1, n_subplots, i_subplot + 1))
Beispiel #10
0
def plotOptimizationRollouts(directory,fig,plotRollout=None,plot_all_rollouts=False):
    
    if not fig:    
        fig = plt.figure(1,figsize=(9, 4))

    # Determine number of updates
    n_updates = 1
    update_dir = '%s/update%05d' % (directory, n_updates)
    while containsNewDistribution(update_dir):
        n_updates += 1
        update_dir = '%s/update%05d' % (directory, n_updates)
    n_updates -= 1
    
    if n_updates<2:
        return None
    
    all_costs = []
    learning_curve = []
    exploration_curve = []
    
    i_samples = 0    
    for i_update in range(n_updates):
    
        update_dir = '%s/update%05d' % (directory, i_update)
    
        # Read data
        mean = np.loadtxt(update_dir+"/distribution_mean.txt")
        covar = np.loadtxt(update_dir+"/distribution_covar.txt")
        distribution = DistributionGaussian(mean,covar)
        
        try:
            mean = np.loadtxt(update_dir+"/distribution_new_mean.txt")
            covar = np.loadtxt(update_dir+"/distribution_new_covar.txt")
            distribution_new = DistributionGaussian(mean,covar)
        except IOError:
            distribution_new = None
    
        try:
            covar_block_sizes = np.loadtxt(update_dir+"/covar_block_sizes.txt")
        except IOError:
            covar_block_sizes = len(distribution.mean)
        
        try:
            samples = np.loadtxt(update_dir+"/samples.txt")
        except IOError:
            samples = None
        costs = np.loadtxt(update_dir+"/costs.txt")
        weights = np.loadtxt(update_dir+"/weights.txt")
    
        
        try:
            cost_eval = np.loadtxt(update_dir+"/cost_eval.txt")
        except IOError:
            cost_eval = None
    
        n_rollouts = len(weights)
        rollouts = []
        for i_rollout in range(n_rollouts):
            rollout_dir = '%s/rollout%03d' % (update_dir, i_rollout+1)
            rollouts.append(loadRolloutFromDirectory(rollout_dir))
            
        rollout_eval = loadRolloutFromDirectory(update_dir+'/rollout_eval/')
    
        # Update learning curve 
        # Bookkeeping and plotting
        # All the costs so far
        all_costs.extend(costs)
        # Update exploration curve
        i_samples = i_samples + n_rollouts
        cur_exploration = np.sqrt(distribution.maxEigenValue())
        exploration_curve.append([i_samples,cur_exploration])
        # Update learning curve
        learning_curve.append([i_samples])
        learning_curve[-1].extend(np.atleast_1d(cost_eval))
        
        n_subplots = 3
        i_subplot = 1
        if plotRollout:
            n_subplots = 4
            ax_rollout = fig.add_subplot(1,n_subplots,i_subplot)
            i_subplot += 1
            h = plotRollout(rollout_eval.cost_vars,ax_rollout)
            setColor(h,i_update,n_updates)
            
         
        ax_space = fig.add_subplot(1,n_subplots,i_subplot)
        i_subplot += 1
        highlight = (i_update==0)
        plotUpdate(distribution,cost_eval,samples,costs,weights,distribution_new,ax_space,highlight)
            
        
    
    plotExplorationCurve(exploration_curve,fig.add_subplot(1,n_subplots,i_subplot))
    plotLearningCurve(learning_curve,fig.add_subplot(1,n_subplots,i_subplot+1))