def setUp(self): self.R = np.asarray([[ 1.0, 0.4, -0.4], [ 0.4, 1.0, 0.6], [-0.4, 0.6, 1.0]]) self.mu = [100.0] * 3 self.sigma = [10.0] * 3 self.cov = rewards.cor2cov(self.R, self.sigma) self.maze = grid.make_multimaze(4, 4, 3) self.goals = grid.maze_goal_states(self.maze, 3, self.mu, self.cov)
def test_invalidR(self): numStates = 1000 numActions = 10 mu = [0.0] * 3 sigma = [1.0] * 3 R = np.asarray([[ 1.0, -0.7, 0.8], [-0.7, 1.0, 0.9], [ 0.8, 0.9, 1.0]]) cov = rewards.cor2cov(R, sigma) self.assertRaises(sla.LinAlgError, rewards.mvnrewards, numStates, numActions, mu, R)
def test_rewards2(self): numStates = 5000 numActions = 20 mu = [10, 10, 10] sigma = [1, 1, 1] R = np.asarray([[ 1.0, -0.7, -0.5], [-0.7, 1.0, 0.8], [-0.5, 0.8, 1.0]]) cov = rewards.cor2cov(R, sigma) D = rewards.mvnrewards(numStates, numActions, mu, cov) self.checkCorrelations(R, D)
def test_rewards1(self): numStates = 1000 numActions = 5 mu = [20, 0, 50] sigma = [5, 5, 10] R = np.asarray([[ 1.0, 0.4, -0.4], [ 0.4, 1.0, 0.6], [-0.4, 0.6, 1.0]]) cov = rewards.cor2cov(R, sigma) D = rewards.mvnrewards(numStates, numActions, mu, cov) self.checkCorrelations(R, D)
def test_rewards3(self): numStates = 200 numActions = 8 mu = [0, -10, 30, 0] sigma = [5, 0.5, 10, 2.0] R = np.asarray([[ 1.0, 0.2, -0.5, 0.0], [ 0.2, 1.0, 0.4, 0.0], [-0.5, 0.4, 1.0, 0.6], [ 0.0, 0.0, 0.6, 1.0]]) cov = rewards.cor2cov(R, sigma) D = rewards.mvnrewards(numStates, numActions, mu, cov) self.checkCorrelations(R, D)
# get the target mean vector for rewards. If not given, assume zero means if not args.rmeans: args.rmeans = np.zeros(args.tasks) else: args.rmeans = np.asarray(ast.literal_eval(args.rmeans)) # read standard deviation for the rewards for each task. If not given, assume # unit standard deviations if not args.stdev: args.stdev = np.ones(args.tasks) else: args.stdev = np.asarray(ast.literal_eval(args.stdev)) # compute a covariance matrix from the correlation matrix and standard deviations cov = rwd.cor2cov(args.correlation, args.stdev) if not rwd.is_pos_def(cov): print('Error: covariance matrix must be positive definite', file=sys.stderr) sys.exit(1) # maze type instances if args.type == 'maze': # TODO: incorporate correlated rewards somehow maze = grd.make_multimaze(args.rows, args.cols, args.tasks) goals = grd.maze_goal_states(maze, args.tasks, args.rmeans, cov) io.write_maze_instance(maze, goals) print('# type={}, rows={}, cols={}, correlation={}, stdev={}'. format(args.type, args.rows, args.cols, args.correlation.tolist(), args.stdev.tolist()))