def python_tokenize(code): # Since formulas can only contain Python expressions, and Python # expressions cannot meaningfully contain newlines, we'll just remove all # the newlines up front to avoid any complications: code = code.replace("\n", " ").strip() it = tokenize.generate_tokens(StringIO(code).readline) try: for (pytype, string, (_, start), (_, end), code) in it: if pytype == tokenize.ENDMARKER: break origin = Origin(code, start, end) assert pytype != tokenize.NL if pytype == tokenize.NEWLINE: assert string == "" continue if pytype == tokenize.ERRORTOKEN: raise PatsyError("error tokenizing input " "(maybe an unclosed string?)", origin) if pytype == tokenize.COMMENT: raise PatsyError("comments are not allowed", origin) yield (pytype, string, origin) else: # pragma: no cover raise ValueError("stream ended without ENDMARKER?!?") except tokenize.TokenError as e: # TokenError is raised iff the tokenizer thinks that there is # some sort of multi-line construct in progress (e.g., an # unclosed parentheses, which in Python lets a virtual line # continue past the end of the physical line), and it hits the # end of the source text. We have our own error handling for # such cases, so just treat this as an end-of-stream. # # Just in case someone adds some other error case: assert e.args[0].startswith("EOF in multi-line") return
def _eval_factor(factor_info, data, NA_action): factor = factor_info.factor result = factor.eval(factor_info.state, data) # Returns either a 2d ndarray, or a DataFrame, plus is_NA mask if factor_info.type == "numerical": result = atleast_2d_column_default(result, preserve_pandas=True) _max_allowed_dim(2, result, factor) if result.shape[1] != factor_info.num_columns: raise PatsyError("when evaluating factor %s, I got %s columns " "instead of the %s I was expecting" % (factor.name(), factor_info.num_columns, result.shape[1]), factor) if not safe_issubdtype(np.asarray(result).dtype, np.number): raise PatsyError("when evaluating numeric factor %s, " "I got non-numeric data of type '%s'" % (factor.name(), result.dtype), factor) return result, NA_action.is_numerical_NA(result) # returns either a 1d ndarray or a pandas.Series, plus is_NA mask else: assert factor_info.type == "categorical" result = categorical_to_int(result, factor_info.categories, NA_action, origin=factor_info.factor) assert result.ndim == 1 return result, np.asarray(result == -1)
def _read_op_context(token, c): if token.type == Token.RPAREN: if c.trace: print("Found close-paren") while c.op_stack and c.op_stack[-1].op.token_type != Token.LPAREN: _run_op(c) if not c.op_stack: raise PatsyError("missing '(' or extra ')'", token) assert c.op_stack[-1].op.token_type == Token.LPAREN # Expand the origin of the item on top of the noun stack to include # the open and close parens: combined = Origin.combine( [c.op_stack[-1].token, c.noun_stack[-1].token, token]) c.noun_stack[-1].origin = combined # Pop the open-paren c.op_stack.pop() return False elif token.type in c.binary_ops: if c.trace: print("Found binary operator %r" % (token.type)) stackop = _StackOperator(c.binary_ops[token.type], token) while (c.op_stack and stackop.op.precedence <= c.op_stack[-1].op.precedence): _run_op(c) if c.trace: print("Pushing binary operator %r" % (token.type)) c.op_stack.append(stackop) return True else: raise PatsyError( "expected an operator, not '%s'" % (token.origin.relevant_code(), ), token)
def _get_level(levels, level_ref): if level_ref in levels: return levels.index(level_ref) if isinstance(level_ref, six.integer_types): if level_ref < 0: level_ref += len(levels) if not (0 <= level_ref < len(levels)): raise PatsyError("specified level %r is out of range" % (level_ref,)) return level_ref raise PatsyError("specified level %r not found" % (level_ref,))
def dmatrices(formula_like, data={}, eval_env=0, NA_action="drop", return_type="matrix"): """Construct two design matrices given a formula_like and data. This function is identical to :func:`dmatrix`, except that it requires (and returns) two matrices instead of one. By convention, the first matrix is the "outcome" or "y" data, and the second is the "predictor" or "x" data. it requires the formula to specify both a left-hand side outcome matrix and a right-hand side predictors matrix, which are returned as a tuple. See :func:`dmatrix` for details. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type) if lhs.shape[1] == 0: raise PatsyError("model is missing required outcome variables") return (lhs, rhs)
def _code_either(self, intercept, levels): n = len(levels) scores = self.scores if scores is None: scores = np.arange(n) scores = np.asarray(scores, dtype=float) if len(scores) != n: raise PatsyError("number of levels (%s) does not match" " number of scores (%s)" % (n, len(scores))) # Strategy: just make a matrix whose columns are naive linear, # quadratic, etc., functions of the raw scores, and then use 'qr' to # orthogonalize each column against those to its left. scores -= scores.mean() raw_poly = scores.reshape((-1, 1)) ** np.arange(n).reshape((1, -1)) q, r = np.linalg.qr(raw_poly) q *= np.sign(np.diag(r)) q /= np.sqrt(np.sum(q ** 2, axis=1)) # The constant term is always all 1's -- we don't normalize it. q[:, 0] = 1 names = [".Constant", ".Linear", ".Quadratic", ".Cubic"] names += ["^%s" % (i,) for i in xrange(4, n)] names = names[:n] if intercept: return ContrastMatrix(q, names) else: # We always include the constant/intercept column as something to # orthogonalize against, but we don't always return it: return ContrastMatrix(q[:, 1:], names[1:])
def _tokenize_constraint(string, variable_names): lparen_re = r"\(" rparen_re = r"\)" op_re = "|".join([re.escape(op.token_type) for op in _ops]) num_re = r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?" whitespace_re = r"\s+" # Prefer long matches: variable_names = sorted(variable_names, key=len, reverse=True) variable_re = "|".join([re.escape(n) for n in variable_names]) lexicon = [ (lparen_re, _token_maker(Token.LPAREN, string)), (rparen_re, _token_maker(Token.RPAREN, string)), (op_re, _token_maker("__OP__", string)), (variable_re, _token_maker("VARIABLE", string)), (num_re, _token_maker("NUMBER", string)), (whitespace_re, None), ] scanner = Scanner(lexicon) tokens, leftover = scanner.scan(string) if leftover: offset = len(string) - len(leftover) raise PatsyError("unrecognized token in constraint", Origin(string, offset, offset + 1)) return tokens
def _eval_binary_div(self, tree): left = self.eval(tree.args[0]) right = self.eval(tree.args[1]) if not self.is_constant(right): raise PatsyError("Can't divide by a variable in a linear " "constraint", tree.args[1]) return left / right[-1]
def eval(self, data, NA_action): result = self.factor.eval(self._state, data) result = atleast_2d_column_default(result, preserve_pandas=True) _max_allowed_dim(2, result, self.factor) if result.shape[1] != self._expected_columns: raise PatsyError( "when evaluating factor %s, I got %s columns " "instead of the %s I was expecting" % (self.factor.name(), self._expected_columns, result.shape[1]), self.factor) if not np.issubdtype(np.asarray(result).dtype, np.number): raise PatsyError( "when evaluating numeric factor %s, " "I got non-numeric data of type '%s'" % (self.factor.name(), result.dtype), self.factor) return result, NA_action.is_numerical_NA(result)
def sniff(self, data): if hasattr(data, "contrast"): self._contrast = data.contrast # returns a bool: are we confident that we found all the levels? if have_pandas_categorical and isinstance(data, pandas.Categorical): # pandas.Categorical has its own NA detection, so don't try to # second-guess it. self._levels = tuple(data.levels) return True if isinstance(data, _CategoricalBox): if data.levels is not None: self._levels = tuple(data.levels) return True else: # unbox and fall through data = data.data for value in data: if self._NA_action.is_categorical_NA(value): continue if value is True or value is False: self._level_set.update([True, False]) else: try: self._level_set.add(value) except TypeError: raise PatsyError( "Error interpreting categorical data: " "all items must be hashable", self._origin) # If everything we've seen is boolean, assume that everything else # would be too. Otherwise we need to keep looking. return self._level_set == set([True, False])
def _eval_unary_minus(evaluator, tree): if tree.args[0].type == "ZERO": return IntermediateExpr(True, tree.origin, False, []) elif tree.args[0].type == "ONE": return IntermediateExpr(False, None, True, []) else: raise PatsyError("Unary minus can only be applied to 1 or 0", tree)
def incr_dbuilder(formula_like, data_iter_maker, eval_env=0, NA_action="drop"): """Construct a design matrix builder incrementally from a large data set. :arg formula_like: Similar to :func:`dmatrix`, except that explicit matrices are not allowed. Must be a formula string, a :class:`ModelDesc`, a :class:`DesignInfo`, or an object with a ``__patsy_get_model_desc__`` method. :arg data_iter_maker: A zero-argument callable which returns an iterator over dict-like data objects. This must be a callable rather than a simple iterator because sufficiently complex formulas may require multiple passes over the data (e.g. if there are nested stateful transforms). :arg eval_env: Either a :class:`EvalEnvironment` which will be used to look up any variables referenced in `formula_like` that cannot be found in `data`, or else a depth represented as an integer which will be passed to :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the context of the function calling :func:`incr_dbuilder` for lookups. If calling this function from a library, you probably want ``eval_env=1``, which means that variables should be resolved in *your* caller's namespace. :arg NA_action: An :class:`NAAction` object or string, used to determine what values count as 'missing' for purposes of determining the levels of categorical factors. :returns: A :class:`DesignInfo` Tip: for `data_iter_maker`, write a generator like:: def iter_maker(): for data_chunk in my_data_store: yield data_chunk and pass `iter_maker` (*not* `iter_maker()`). .. versionadded:: 0.2.0 The ``NA_action`` argument. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) if design_infos is None: raise PatsyError("bad formula-like object") if len(design_infos[0].column_names) > 0: raise PatsyError("encountered outcome variables for a model " "that does not expect them") return design_infos[1]
def incr_dbuilders(formula_like, data_iter_maker, eval_env=0, NA_action="drop"): """Construct two design matrix builders incrementally from a large data set. :func:`incr_dbuilders` is to :func:`incr_dbuilder` as :func:`dmatrices` is to :func:`dmatrix`. See :func:`incr_dbuilder` for details. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) builders = _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) if builders is None: raise PatsyError("bad formula-like object") if len(builders[0].design_info.column_names) == 0: raise PatsyError("model is missing required outcome variables") return builders
def _handle_raise(self, values, is_NAs, origins): for is_NA, origin in zip(is_NAs, origins): if np.any(is_NA): msg = "Missing values detected. If you want rows with missing "\ "values to be automatically deleted in a list-wise " \ "manner (not recommended), please set dropna=True in " \ "the bambi Model initialization." raise PatsyError(msg, origin) return values
def categorical_to_int(data, levels, NA_action, origin=None): assert isinstance(levels, tuple) # In this function, missing values are always mapped to -1 if isinstance(data, pd.Categorical): data_levels_tuple = tuple(data.levels) if not data_levels_tuple == levels: raise PatsyError("mismatching levels: expected %r, got %r" % (levels, data_levels_tuple), origin) # pd.Categorical also uses -1 to indicate NA, and we don't try to # second-guess its NA detection, so we can just pass it back. return data.labels elif hasattr(data, 'dtype') and hasattr(data, 'astype') and \ np.issubdtype(data.dtype, np.bool_): return data.astype('int') if isinstance(data, _CategoricalBox): if data.levels is not None and tuple(data.levels) != levels: raise PatsyError("mismatching levels: expected %r, got %r" % (levels, tuple(data.levels)), origin) data = data.data if hasattr(data, "shape") and len(data.shape) > 1: raise PatsyError("categorical data must be 1-dimensional", origin) if not iterable(data) or isinstance(data, basestring): raise PatsyError("categorical data must be an iterable container") try: level_to_int = dict(zip(levels, xrange(len(levels)))) except TypeError: raise PatsyError("Error interpreting categorical data: " "all items must be hashable", origin) out = np.empty(len(data), dtype=int) for i, value in enumerate(data): if NA_action.is_categorical_NA(value): out[i] = -1 else: try: out[i] = level_to_int[value] except KeyError: SHOW_LEVELS = 4 level_strs = [] if len(levels) <= SHOW_LEVELS: level_strs += [repr(level) for level in levels] else: level_strs += [repr(level) for level in levels[:SHOW_LEVELS//2]] level_strs.append("...") level_strs += [repr(level) for level in levels[-SHOW_LEVELS//2:]] level_str = "[%s]" % (", ".join(level_strs)) raise PatsyError("Error converting data to categorical: " "observation with value %r does not match " "any of the expected levels (expected: %s)" % (value, level_str), origin) except TypeError: raise PatsyError("Error converting data to categorical: " "encountered unhashable value %r" % (value,), origin) if isinstance(data, pd.Series): out = pd.Series(out, index=data.index) return out
def _eval_binary_multiply(self, tree): left = self.eval(tree.args[0]) right = self.eval(tree.args[1]) if self.is_constant(left): return left[-1] * right elif self.is_constant(right): return left * right[-1] else: raise PatsyError("Can't multiply one variable by another " "in a linear constraint", tree)
def eval(self, tree, require_evalexpr=True): result = None assert isinstance(tree, ParseNode) key = (tree.type, len(tree.args)) if key not in self._evaluators: raise PatsyError("I don't know how to evaluate this " "'%s' operator" % (tree.type,), tree.token) result = self._evaluators[key](self, tree) if require_evalexpr and not isinstance(result, IntermediateExpr): if isinstance(result, ModelDesc): raise PatsyError("~ can only be used once, and " "only at the top level", tree) else: raise PatsyError("custom operator returned an " "object that I don't know how to " "handle", tree) return result
def infix_parse(tokens, operators, atomic_types, trace=False): token_source = iter(tokens) unary_ops = {} binary_ops = {} for op in operators: assert op.precedence > _open_paren.precedence if op.arity == 1: unary_ops[op.token_type] = op elif op.arity == 2: binary_ops[op.token_type] = op else: raise ValueError("operators must be unary or binary") c = _ParseContext(unary_ops, binary_ops, atomic_types, trace) # This is an implementation of Dijkstra's shunting yard algorithm: # http://en.wikipedia.org/wiki/Shunting_yard_algorithm # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm want_noun = True for token in token_source: if c.trace: print("Reading next token (want_noun=%r)" % (want_noun, )) if want_noun: want_noun = _read_noun_context(token, c) else: want_noun = _read_op_context(token, c) if c.trace: print("End of token stream") if want_noun: raise PatsyError("expected a noun, but instead the expression ended", c.op_stack[-1].token.origin) while c.op_stack: if c.op_stack[-1].op.token_type == Token.LPAREN: raise PatsyError("Unmatched '('", c.op_stack[-1].token) _run_op(c) assert len(c.noun_stack) == 1 return c.noun_stack.pop()
def demo_data(*names, **kwargs): """demo_data(*names, nlevels=2, min_rows=5) Create simple categorical/numerical demo data. Pass in a set of variable names, and this function will return a simple data set using those variable names. Names whose first letter falls in the range "a" through "m" will be made categorical (with `nlevels` levels). Those that start with a "p" through "z" are numerical. We attempt to produce a balanced design on the categorical variables, repeating as necessary to generate at least `min_rows` data points. Categorical variables are returned as a list of strings. Numerical data is generated by sampling from a normal distribution. A fixed random seed is used, so that identical calls to demo_data() will produce identical results. Numerical data is returned in a numpy array. Example: .. ipython: In [1]: patsy.demo_data("a", "b", "x", "y") Out[1]: {'a': ['a1', 'a1', 'a2', 'a2', 'a1', 'a1', 'a2', 'a2'], 'b': ['b1', 'b2', 'b1', 'b2', 'b1', 'b2', 'b1', 'b2'], 'x': array([ 1.76405235, 0.40015721, 0.97873798, 2.2408932 , 1.86755799, -0.97727788, 0.95008842, -0.15135721]), 'y': array([-0.10321885, 0.4105985 , 0.14404357, 1.45427351, 0.76103773, 0.12167502, 0.44386323, 0.33367433])} """ nlevels = kwargs.pop("nlevels", 2) min_rows = kwargs.pop("min_rows", 5) if kwargs: raise TypeError("unexpected keyword arguments %r" % (kwargs, )) numerical = set() categorical = {} for name in names: if name[0] in "abcdefghijklmn": categorical[name] = nlevels elif name[0] in "pqrstuvwxyz": numerical.add(name) else: raise PatsyError("bad name %r" % (name, )) balanced_design_size = np.prod(list(categorical.values()), dtype=int) repeat = int(np.ceil(min_rows * 1.0 / balanced_design_size)) num_rows = repeat * balanced_design_size data = balanced(repeat=repeat, **categorical) r = np.random.RandomState(0) for name in sorted(numerical): data[name] = r.normal(size=num_rows) return data
def eval(self, tree, constraint=False): key = (tree.type, len(tree.args)) assert key in self._dispatch val = self._dispatch[key](tree) if constraint: # Force it to be a constraint if isinstance(val, LinearConstraint): return val else: assert val.size == self._N + 1 if np.all(val[:self._N] == 0): raise PatsyError("term is constant, with no variables", tree) return LinearConstraint(self._variable_names, val[:self._N], -val[-1]) else: # Force it to *not* be a constraint if isinstance(val, LinearConstraint): raise PatsyError("unexpected constraint object", tree) return val
def _categorical_shape_fix(data): # helper function # data should not be a _CategoricalBox or pandas Categorical or anything # -- it should be an actual iterable of data, but which might have the # wrong shape. if hasattr(data, "ndim") and data.ndim > 1: raise PatsyError("categorical data cannot be >1-dimensional") # coerce scalars into 1d, which is consistent with what we do for numeric # factors. (See statsmodels/statsmodels#1881) if (not iterable(data) or isinstance(data, (six.text_type, six.binary_type))): data = [data] return data
def check(self, seen_value, desc, origin): if self.value is None: self.value = seen_value self._value_desc = desc self._value_origin = origin else: if not self._eq_fn(self.value, seen_value): msg = ("%s mismatch between %s and %s" % (self._name, self._value_desc, desc)) if isinstance(self.value, int): msg += " (%r versus %r)" % (self.value, seen_value) # XX FIXME: this is a case where having discontiguous Origins # would be useful... raise PatsyError(msg, origin)
def _read_python_expr(it, end_tokens): # Read out a full python expression, stopping when we hit an # unnested end token. pytypes = [] token_strings = [] origins = [] bracket_level = 0 for pytype, token_string, origin in it: assert bracket_level >= 0 if bracket_level == 0 and token_string in end_tokens: it.push_back((pytype, token_string, origin)) break if token_string in ("(", "[", "{"): bracket_level += 1 if token_string in (")", "]", "}"): bracket_level -= 1 if bracket_level < 0: raise PatsyError("unmatched close bracket", origin) pytypes.append(pytype) token_strings.append(token_string) origins.append(origin) # Either we found an end_token, or we hit the end of the string if bracket_level == 0: expr_text = pretty_untokenize(zip(pytypes, token_strings)) if expr_text == "0": token_type = "ZERO" elif expr_text == "1": token_type = "ONE" elif _is_a(int, expr_text) or _is_a(float, expr_text): token_type = "NUMBER" else: token_type = "PYTHON_EXPR" return Token(token_type, Origin.combine(origins), extra=expr_text) else: raise PatsyError("unclosed bracket in embedded Python " "expression", Origin.combine(origins))
def _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action): if isinstance(formula_like, DesignInfo): return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0], formula_like) if (isinstance(formula_like, tuple) and len(formula_like) == 2 and isinstance(formula_like[0], DesignInfo) and isinstance(formula_like[1], DesignInfo)): return formula_like if hasattr(formula_like, "__patsy_get_model_desc__"): formula_like = formula_like.__patsy_get_model_desc__(eval_env) if not isinstance(formula_like, ModelDesc): raise PatsyError("bad value from %r.__patsy_get_model_desc__" % (formula_like, )) # fallthrough if not six.PY3 and isinstance(formula_like, unicode): # Included for the convenience of people who are using py2 with # __future__.unicode_literals. try: formula_like = formula_like.encode("ascii") except UnicodeEncodeError: raise PatsyError( "On Python 2, formula strings must be either 'str' objects, " "or else 'unicode' objects containing only ascii " "characters. You passed a unicode string with non-ascii " "characters. I'm afraid you'll have to either switch to " "ascii-only, or else upgrade to Python 3.") if isinstance(formula_like, str): formula_like = ModelDesc.from_formula(formula_like) # fallthrough if isinstance(formula_like, ModelDesc): assert isinstance(eval_env, EvalEnvironment) return design_matrix_builders( [formula_like.lhs_termlist, formula_like.rhs_termlist], data_iter_maker, eval_env, NA_action) else: return None
def call_and_wrap_exc(msg, origin, f, *args, **kwargs): try: return f(*args, **kwargs) except Exception, e: if sys.version_info[0] >= 3: new_exc = PatsyError("%s: %s: %s" % (msg, e.__class__.__name__, e), origin) # Use 'exec' to hide this syntax from the Python 2 parser: exec("raise new_exc from e") else: # In python 2, we just let the original exception escape -- better # than destroying the traceback. But if it's a PatsyError, we can # at least set the origin properly. if isinstance(e, PatsyError): e.set_origin(origin) raise
def build(self, factor_values, out): assert self.total_columns == out.shape[1] out[:] = 1 for i, column_idxs in enumerate(_column_combinations(self._columns_per_factor)): for factor, column_idx in zip(self._factors, column_idxs): if factor in self._cat_contrasts: contrast = self._cat_contrasts[factor] if np.any(factor_values[factor] < 0): raise PatsyError("can't build a design matrix " "containing missing values", factor) out[:, i] *= contrast.matrix[factor_values[factor], column_idx] else: assert (factor_values[factor].shape[1] == self._num_columns[factor]) out[:, i] *= factor_values[factor][:, column_idx]
def slice(self, columns_specifier): """Locate a subset of design matrix columns, specified symbolically. A patsy design matrix has two levels of structure: the individual columns (which are named), and the :ref:`terms <formulas>` in the formula that generated those columns. This is a one-to-many relationship: a single term may span several columns. This method provides a user-friendly API for locating those columns. (While we talk about columns here, this is probably most useful for indexing into other arrays that are derived from the design matrix, such as regression coefficients or covariance matrices.) The `columns_specifier` argument can take a number of forms: * A term name * A column name * A :class:`Term` object * An integer giving a raw index * A raw slice object In all cases, a Python :func:`slice` object is returned, which can be used directly for indexing. Example:: y, X = dmatrices("y ~ a", demo_data("y", "a", nlevels=3)) betas = np.linalg.lstsq(X, y)[0] a_betas = betas[X.design_info.slice("a")] (If you want to look up a single individual column by name, use ``design_info.column_name_indexes[name]``.) """ if isinstance(columns_specifier, slice): return columns_specifier if np.issubsctype(type(columns_specifier), np.integer): return slice(columns_specifier, columns_specifier + 1) if (self.term_slices is not None and columns_specifier in self.term_slices): return self.term_slices[columns_specifier] if columns_specifier in self.term_name_slices: return self.term_name_slices[columns_specifier] if columns_specifier in self.column_name_indexes: idx = self.column_name_indexes[columns_specifier] return slice(idx, idx + 1) raise PatsyError("unknown column specified '%s'" % (columns_specifier, ))
def ast_names(code): """Iterator that yields all the (ast) names in a Python expression. :arg code: A string containing a Python expression. """ # Syntax that allows new name bindings to be introduced is tricky to # handle here, so we just refuse to do so. disallowed_ast_nodes = (ast.Lambda, ast.ListComp, ast.GeneratorExp) if sys.version_info >= (2, 7): disallowed_ast_nodes += (ast.DictComp, ast.SetComp) for node in ast.walk(ast.parse(code)): if isinstance(node, disallowed_ast_nodes): raise PatsyError("Lambda, list/dict/set comprehension, generator " "expression in patsy formula not currently supported.") if isinstance(node, ast.Name): yield node.id
def _build_subterm(subterm, factor_infos, factor_values, out): assert subterm.num_columns == out.shape[1] out[...] = 1 for i, column_idxs in enumerate( _subterm_column_combinations(factor_infos, subterm)): for factor, column_idx in zip(subterm.factors, column_idxs): if factor_infos[factor].type == "categorical": contrast = subterm.contrast_matrices[factor] if np.any(factor_values[factor] < 0): raise PatsyError("can't build a design matrix " "containing missing values", factor) out[:, i] *= contrast.matrix[factor_values[factor], column_idx] else: assert factor_infos[factor].type == "numerical" assert (factor_values[factor].shape[1] == factor_infos[factor].num_columns) out[:, i] *= factor_values[factor][:, column_idx]
def _eval_binary_eq(self, tree): # Handle "a1 = a2 = a3", which is parsed as "(a1 = a2) = a3" args = list(tree.args) constraints = [] for i, arg in enumerate(args): if arg.type == "=": constraints.append(self.eval(arg, constraint=True)) # make our left argument be their right argument, or # vice-versa args[i] = arg.args[1 - i] left = self.eval(args[0]) right = self.eval(args[1]) coefs = left[:self._N] - right[:self._N] if np.all(coefs == 0): raise PatsyError("no variables appear in constraint", tree) constant = -left[-1] + right[-1] constraint = LinearConstraint(self._variable_names, coefs, constant) constraints.append(constraint) return LinearConstraint.combine(constraints)