def score_train_test( X, Y, train, test, X_treat=None, Y_treat=None, FoldNumber=None, # pylint: disable=unused-argument grad_splits=None, progress=None, # pylint: disable=unused-argument **kwargs): """ Presents a unified api for ct_v_matrix and loo_v_matrix and returns the v_mat, w_pen (possibly calculated, possibly a parameter), and the score :param X: Matrix of covariates for untreated units :type X: coercible to :class:`numpy.matrix` :param Y: Matrix of outcomes for untreated units :type Y: coercible to :class:`numpy.matrix` :param train: List of rows in the current training set :type train: int[] :param test: LIst of rows in the current test set :type test: int[] :param X_treat: Optional matrix of covariates for treated units :type X_treat: coercible to :class:`numpy.matrix` :param Y_treat: Optional matrix of outcomes for treated units :type Y_treat: ``None`` or coercible to :class:`numpy.matrix` :param FoldNumber: Unused, for API compatibility only. :type FoldNumber: ``None`` :param grad_splits: Splits for Fitted v.s. Control units in each gradient descent step. An integer, or a list/generator of train and test units in each fold of the gradient descent. :type grad_splits: int or int[][], optional :param progress: Should progress messages be printed to the console? :type progress: boolean :param kwargs: additional arguments passed to the underlying matrix method :raises ValueError: when X, Y, X_treat, or Y_treat are not coercible to a :class:`numpy.matrix` or have incompatible dimensions :raises RuntimeError: When a MemoryError is raised and grad_splits (which reduces memory requirements) is not used. :returns: tuple containing the matrix of covariate weights, the unit weights penalty, and the out-of-sample score :rtype: tuple """ if (X_treat is None) != (Y_treat is None): raise ValueError( "parameters `X_treat` and `Y_treat` must both be Matrices or None") if X_treat is not None: # >> K-fold validation on the Treated units; assuming that Y and # Y_treat are pre-intervention outcomes # PARAMETER QC try: X = np.asmatrix(X) except ValueError: raise ValueError("X is not coercible to a matrix") try: Y = np.asmatrix(Y) except ValueError: raise ValueError("Y is not coercible to a matrix") if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0") if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0") if X_treat.shape[0] != Y_treat.shape[0]: raise ValueError( "X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0])) # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen # note that the weights, score, and loss function value returned here # are for the in-sample predictions _, v_mat, _, _, w_pen, _ = ct_v_matrix( X=np.vstack((X, X_treat[train, :])), Y=np.vstack((Y, Y_treat[train, :])), treated_units=[X.shape[0] + i for i in range(len(train))], **kwargs) # GET THE OUT-OF-SAMPLE PREDICTION ERROR s = ct_score( X=np.vstack((X, X_treat[test, :])), Y=np.vstack((Y, Y_treat[test, :])), treated_units=[X.shape[0] + i for i in range(len(test))], V=v_mat, w_pen=w_pen, ) else: # X_treat *is* None # >> K-fold validation on the only control units; assuming that Y # contains post-intervention outcomes if grad_splits is not None: try: iter(grad_splits) except TypeError: # not iterable pass else: # TRIM THE GRAD SPLITS NEED TO THE TRAINING SET # inspired by R's match() function match = lambda a, b: np.concatenate( [np.where(a == x)[0] for x in b]) grad_splits = [(match(train, _X), match(train, _Y)) for _X, _Y in grad_splits] # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen # note that the weights, score, and loss function value returned # here are for the in-sample predictions _, v_mat, _, _, w_pen, _ = fold_v_matrix( X=X[train, :], Y=Y[train, :], # treated_units = [X.shape[0] + i for i in range(len(train))], grad_splits=grad_splits, **kwargs) # GET THE OUT-OF-SAMPLE PREDICTION ERROR (could also use loo_score, actually...) s = ct_score( X=X, Y=Y, # formerly: fold_score treated_units=test, V=v_mat, w_pen=w_pen, ) else: # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen # note that the weights, score, and loss function value returned # here are for the in-sample predictions try: _, v_mat, _, _, w_pen, _ = loo_v_matrix( X=X[train, :], Y=Y[train, :], # treated_units = [X.shape[0] + i for i in range(len(train))], **kwargs) except MemoryError: raise RuntimeError( "MemoryError encountered. Try setting `grad_splits` " + "parameter to reduce memory requirements.") # GET THE OUT-OF-SAMPLE PREDICTION ERROR s = ct_score(X=X, Y=Y, treated_units=test, V=v_mat, w_pen=w_pen) return v_mat, w_pen, s
def tensor(X, Y, X_treat=None, Y_treat=None, grad_splits=None, **kwargs): """ Presents a unified api for ct_v_matrix and loo_v_matrix """ # PARAMETER QC try: X = np.float64(X) except ValueError: raise ValueError("X is not coercible to float64") try: Y = np.float64(Y) except ValueError: raise ValueError("Y is not coercible to float64") Y = np.asmatrix(Y) # this needs to be deprecated properly -- bc Array.dot(Array) != matrix(Array).dot(matrix(Array)) -- not even close !!! X = np.asmatrix(X) if X.shape[1] == 0: raise ValueError("X.shape[1] == 0") if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0") if X.shape[0] != Y.shape[0]: raise ValueError( "X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0]) ) if (X_treat is None) != (Y_treat is None): raise ValueError( "parameters `X_treat` and `Y_treat` must both be Matrices or None" ) if X_treat is not None: # Fit the Treated units to the control units; assuming that Y contains # pre-intervention outcomes: # PARAMETER QC try: X_treat = np.float64(X_treat) except ValueError: raise ValueError("X_treat is not coercible to float64") try: Y_treat = np.float64(Y_treat) except ValueError: raise ValueError("Y_treat is not coercible to float64") Y_treat = np.asmatrix(Y_treat) # this needs to be deprecated properly -- bc Array.dot(Array) != matrix(Array).dot(matrix(Array)) -- not even close !!! X_treat = np.asmatrix(X_treat) if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0") if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0") if X_treat.shape[0] != Y_treat.shape[0]: raise ValueError( "X_treat and Y_treat have different number of rows (%s and %s)" % (X_treat.shape[0], Y_treat.shape[0]) ) # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen # note that the weights, score, and loss function value returned here # are for the in-sample predictions _, v_mat, _, _, _, _ = ct_v_matrix( X=np.vstack((X, X_treat)), Y=np.vstack((Y, Y_treat)), control_units=np.arange(X.shape[0]), treated_units=np.arange(X_treat.shape[0]) + X.shape[0], **kwargs ) else: # Fit the control units to themselves; Y may contain post-intervention outcomes: adjusted=False if kwargs["w_pen"] < 1: adjusted=True if grad_splits is not None: _, v_mat, _, _, _, _ = fold_v_matrix( X=X, Y=Y, control_units=np.arange(X.shape[0]), treated_units=np.arange(X.shape[0]), grad_splits=grad_splits, **kwargs ) #if adjusted: # print("vmat: %s" % (np.diag(v_mat))) else: _, v_mat, _, _, _, _ = loo_v_matrix( X=X, Y=Y, control_units=np.arange(X.shape[0]), treated_units=np.arange(X.shape[0]), **kwargs ) return v_mat
def get_max_v_pen(X, Y, w_pen=None, X_treat=None, Y_treat=None, **kwargs): """ Calculates maximum value of v_pen for which the elements of tensor matrix (V) are not all zero conditional on the provided w_pen. If w_pen is not provided, a guestimate is used. Provides a unified wrapper to the various *_v_matrix functions, passing the parameter ``return_max_v_pen = True`` in order to obtain the gradient instead of he matrix """ # PARAMETER QC try: X = np.float64(X) except ValueError: raise ValueError("X is not coercible to a matrix") try: Y = np.float64(Y) except ValueError: raise ValueError("Y is not coercible to a matrix") Y = np.asmatrix( Y ) # this needs to be deprecated properly -- bc Array.dot(Array) != matrix(Array).dot(matrix(Array)) -- not even close !!! X = np.asmatrix(X) if (X_treat is None) != (Y_treat is None): raise ValueError( "parameters `X_treat` and `Y_treat` must both be Matrices or None") if X.shape[1] == 0: raise ValueError("X.shape[1] == 0") if Y.shape[1] == 0: raise ValueError("Y.shape[1] == 0") if X.shape[0] != Y.shape[0]: raise ValueError("X and Y have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0])) if w_pen is None: w_pen = np.mean(np.var(X, axis=0)) if X_treat is not None: # PARAMETER QC if not isinstance(X_treat, np.matrix): raise TypeError("X_treat is not a matrix") if not isinstance(Y_treat, np.matrix): raise TypeError("Y_treat is not a matrix") if X_treat.shape[1] == 0: raise ValueError("X_treat.shape[1] == 0") if Y_treat.shape[1] == 0: raise ValueError("Y_treat.shape[1] == 0") if X_treat.shape[0] != Y_treat.shape[0]: raise ValueError( "X_treat and Y_treat have different number of rows (%s and %s)" % (X.shape[0], Y.shape[0])) control_units = np.arange(X.shape[0]) treated_units = np.arange(X.shape[0], X.shape[0] + X_treat.shape[0]) try: _v_pen = iter(w_pen) except TypeError: # w_pen is a single value return ct_v_matrix(X=np.vstack((X, X_treat)), Y=np.vstack((Y, Y_treat)), w_pen=w_pen, control_units=control_units, treated_units=treated_units, return_max_v_pen=True, gradient_message=_GRADIENT_MESSAGE, **kwargs) else: # w_pen is an iterable of values return [ ct_v_matrix(X=np.vstack((X, X_treat)), Y=np.vstack((Y, Y_treat)), control_units=control_units, treated_units=treated_units, return_max_v_pen=True, gradient_message=_GRADIENT_MESSAGE, w_pen=_w_pen, **kwargs) for _w_pen in w_pen ] else: try: _v_pen = iter(w_pen) except TypeError: if "grad_splits" in kwargs: # w_pen is a single value return fold_v_matrix(X=X, Y=Y, w_pen=w_pen, return_max_v_pen=True, gradient_message=_GRADIENT_MESSAGE, **kwargs) # w_pen is a single value try: return loo_v_matrix(X=X, Y=Y, w_pen=w_pen, return_max_v_pen=True, gradient_message=_GRADIENT_MESSAGE, **kwargs) except MemoryError: raise RuntimeError( "MemoryError encountered. Try setting `grad_splits` " "parameter to reduce memory requirements.") else: if "grad_splits" in kwargs: # w_pen is an iterable of values return [ fold_v_matrix(X=X, Y=Y, w_pen=_w_pen, return_max_v_pen=True, gradient_message=_GRADIENT_MESSAGE, **kwargs) for _w_pen in w_pen ] # w_pen is an iterable of values try: return [ loo_v_matrix(X=X, Y=Y, w_pen=_w_pen, return_max_v_pen=True, gradient_message=_GRADIENT_MESSAGE, **kwargs) for _w_pen in w_pen ] except MemoryError: raise RuntimeError( "MemoryError encountered. Try setting `grad_splits` " "parameter to reduce memory requirements.")