예제 #1
0
 def setUp(self):
     self.R = np.asarray([[ 1.0,  0.4, -0.4],
                          [ 0.4,  1.0,  0.6],
                          [-0.4,  0.6,  1.0]])
     self.mu = [100.0] * 3
     self.sigma = [10.0] * 3
     self.cov = rewards.cor2cov(self.R, self.sigma)
     self.maze = grid.make_multimaze(4, 4, 3)
     self.goals = grid.maze_goal_states(self.maze, 3, self.mu, self.cov)
예제 #2
0
 def test_invalidR(self):
     numStates = 1000
     numActions = 10
     mu = [0.0] * 3
     sigma = [1.0] * 3 
     R = np.asarray([[ 1.0, -0.7,  0.8],
                     [-0.7,  1.0,  0.9],
                     [ 0.8,  0.9,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     self.assertRaises(sla.LinAlgError, rewards.mvnrewards, numStates, numActions, mu, R)
예제 #3
0
 def test_rewards2(self):
     numStates = 5000
     numActions = 20
     mu = [10, 10, 10]
     sigma = [1, 1, 1]
     R = np.asarray([[ 1.0, -0.7, -0.5],
                     [-0.7,  1.0,  0.8],
                     [-0.5,  0.8,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     D = rewards.mvnrewards(numStates, numActions, mu, cov)
     self.checkCorrelations(R, D)
예제 #4
0
 def test_rewards1(self):
     numStates = 1000
     numActions = 5
     mu = [20, 0, 50]
     sigma = [5, 5, 10]
     R = np.asarray([[ 1.0,  0.4, -0.4],
                     [ 0.4,  1.0,  0.6],
                     [-0.4,  0.6,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     D = rewards.mvnrewards(numStates, numActions, mu, cov)
     self.checkCorrelations(R, D)
예제 #5
0
 def test_rewards3(self):
     numStates = 200
     numActions = 8
     mu = [0, -10, 30, 0]
     sigma = [5, 0.5, 10, 2.0]
     R = np.asarray([[ 1.0,  0.2, -0.5,  0.0],
                     [ 0.2,  1.0,  0.4,  0.0],
                     [-0.5,  0.4,  1.0,  0.6],
                     [ 0.0,  0.0,  0.6,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     D = rewards.mvnrewards(numStates, numActions, mu, cov)
     self.checkCorrelations(R, D)
예제 #6
0
파일: merlin.py 프로젝트: deong/merlin
        
    # get the target mean vector for rewards. If not given, assume zero means
    if not args.rmeans:
        args.rmeans = np.zeros(args.tasks)
    else:
        args.rmeans = np.asarray(ast.literal_eval(args.rmeans))
        
    # read standard deviation for the rewards for each task. If not given, assume
    # unit standard deviations
    if not args.stdev:
        args.stdev = np.ones(args.tasks)
    else:
        args.stdev = np.asarray(ast.literal_eval(args.stdev))
        
    # compute a covariance matrix from the correlation matrix and standard deviations
    cov = rwd.cor2cov(args.correlation, args.stdev)
    if not rwd.is_pos_def(cov):
        print('Error: covariance matrix must be positive definite', file=sys.stderr)
        sys.exit(1)


    # maze type instances
    if args.type == 'maze':
        # TODO: incorporate correlated rewards somehow
        maze = grd.make_multimaze(args.rows, args.cols, args.tasks)
        goals = grd.maze_goal_states(maze, args.tasks, args.rmeans, cov)
        io.write_maze_instance(maze, goals)
        print('# type={}, rows={}, cols={}, correlation={}, stdev={}'.
              format(args.type, args.rows, args.cols, args.correlation.tolist(), args.stdev.tolist()))