def partition_terms(terms, metadata): assert type(terms) == OrderedSet assert type(metadata) == Metadata def numeric_factors(term): factors = [f for f in term.factors if is_numeric_col(metadata.column(f))] return OrderedSet(*factors) # The idea here is to store the full term (including the numeric # factors) as a way of remembering the order in which the numeric # and numeric factors originally appeared. I think Patsy does # something like this. groups = group([(numeric_factors(term), term) for term in terms]) # Sort the groups. First comes the group containing no numeric # factors. The remaining groups appear in the order in which a # term containing exactly those numeric factors associated with # the group first appears in `terms`. (The latter is guaranteed by # the fact that `group` is order aware. empty_set = OrderedSet() first, rest = partition(lambda kv: kv[0] != empty_set, groups.items()) return first + rest
Z_0=np.array([[-1.], [-1.], [1.]]), J_0=np.array([0, 1, 1]), y_obs=np.array([1., 2., 3.]))), ]) def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected): metadata = metadata_from_cols( metadata_cols) if metadata_cols is not None else metadata_from_df(df) data = makedata(parse(formula_str), df, metadata, contrasts) assert set(data.keys()) == set(expected.keys()) for k in expected.keys(): assert data[k].dtype == expected[k].dtype assert_equal(data[k], expected[k]) @pytest.mark.parametrize('formula_str, expected_formula', [ ('y ~ 1', Formula('y', OrderedSet(_1), [])), ('y ~ 1 + x', Formula('y', OrderedSet(_1, Term(OrderedSet('x'))), [])), ('y ~ x + x', Formula('y', OrderedSet(Term(OrderedSet('x'))), [])), ('y ~ x1 : x2', Formula('y', OrderedSet(Term(OrderedSet('x1', 'x2'))), [])), ('y ~ (x1 + x2) : x3', Formula( 'y', OrderedSet(Term(OrderedSet('x1', 'x3')), Term(OrderedSet( 'x2', 'x3'))), [])), ]) def test_parser(formula_str, expected_formula): formula = parse(formula_str) assert formula == expected_formula
def numeric_factors(term): factors = [ f for f in term.factors if is_numeric_col(metadata.column(f)) ] return OrderedSet(*factors)
def drop_numeric_factors(term): factors = [f for f in term.factors if not f in shared_numeric_factors] return Term(OrderedSet(*factors))