def _checkRewardsListLike(reward, n_actions, n_states): """Check that a list-like reward input is valid. """ try: lenR = len(reward) if lenR == n_actions: dim1, dim2, dim3 = _checkDimensionsListLike(reward) elif lenR == n_states: dim1 = n_actions dim2 = dim3 = lenR else: raise _error.InvalidError(_MDPERR["R_shape"]) except AttributeError: raise _error.InvalidError(_MDPERR["R_shape"]) return dim1, dim2, dim3
def _checkDimensionsListLike(arrays): """Check that each array in a list of arrays has the same size. """ dim1 = len(arrays) dim2, dim3 = arrays[0].shape for aa in range(1, dim1): dim2_aa, dim3_aa = arrays[aa].shape if (dim2_aa != dim2) or (dim3_aa != dim3): raise _error.InvalidError(_MDPERR["obj_square"]) return dim1, dim2, dim3
def check(P, R): """Check if ``P`` and ``R`` define a valid Markov Decision Process (MDP). Let ``S`` = number of states, ``A`` = number of actions. Arguments --------- P : array The transition matrices. It can be a three dimensional array with a shape of (A, S, S). It can also be a one dimensional arraye with a shape of (A, ), where each element contains a matrix of shape (S, S) which can possibly be sparse. R : array The reward matrix. It can be a three dimensional array with a shape of (S, A, A). It can also be a one dimensional array with a shape of (A, ), where each element contains matrix with a shape of (S, S) which can possibly be sparse. It can also be an array with a shape of (S, A) which can possibly be sparse. Notes ----- Raises an error if ``P`` and ``R`` do not define a MDP. Examples -------- >>> import mdptoolbox, mdptoolbox.example >>> P_valid, R_valid = mdptoolbox.example.rand(100, 5) >>> mdptoolbox.util.check(P_valid, R_valid) # Nothing should happen >>> >>> import numpy as np >>> P_invalid = np.random.rand(5, 100, 100) >>> mdptoolbox.util.check(P_invalid, R_valid) # Raises an exception Traceback (most recent call last): ... StochasticError:... """ # Checking P try: if P.ndim == 3: aP, sP0, sP1 = P.shape elif P.ndim == 1: aP, sP0, sP1 = _checkDimensionsListLike(P) else: raise _error.InvalidError(_MDPERR["P_shape"]) except AttributeError: try: aP, sP0, sP1 = _checkDimensionsListLike(P) except AttributeError: raise _error.InvalidError(_MDPERR["P_shape"]) msg = "" if aP <= 0: msg = "The number of actions in P must be greater than 0." elif sP0 <= 0: msg = "The number of states in P must be greater than 0." if msg: raise _error.InvalidError(msg) # Checking R try: ndimR = R.ndim if ndimR == 1: aR, sR0, sR1 = _checkRewardsListLike(R, aP, sP0) elif ndimR == 2: sR0, aR = R.shape sR1 = sR0 elif ndimR == 3: aR, sR0, sR1 = R.shape else: raise _error.InvalidError(_MDPERR["R_shape"]) except AttributeError: aR, sR0, sR1 = _checkRewardsListLike(R, aP, sP0) msg = "" if sR0 <= 0: msg = "The number of states in R must be greater than 0." elif aR <= 0: msg = "The number of actions in R must be greater than 0." elif sR0 != sR1: msg = "The matrix R must be square with respect to states." elif sP0 != sR0: msg = "The number of states must agree in P and R." elif aP != aR: msg = "The number of actions must agree in P and R." if msg: raise _error.InvalidError(msg) # Check that the P's are square, stochastic and non-negative for aa in range(aP): checkSquareStochastic(P[aa])