def coef_names(terms, metadata, code_lengths): assert type(terms) == OrderedSet assert type(metadata) == Metadata coded_interactions = code_terms(terms, metadata) product_cols = join(coded_interaction_to_product_cols(code, metadata, code_lengths) for code in coded_interactions) return [product_col_to_coef_name(pcol) for pcol in product_cols]
def allfactors(formula): assert type(formula) == Formula def all_from_terms(terms): return join(list(term.factors) for term in terms) return ([formula.response] + all_from_terms(formula.terms) + join( all_from_terms(group.terms) + group.columns for group in formula.groups))
def designmatrix(terms, df, metadata, contrasts): assert type(terms) == OrderedSet coded_interactions = code_terms(terms, metadata) product_cols = join(coded_interaction_to_product_cols(code, metadata, code_lengths(contrasts)) for code in coded_interactions) N = len(df) arrs = [execute_product_col(pcol, df, metadata, contrasts) for pcol in product_cols] X = np.stack(arrs, axis=1) if arrs else np.empty((N, 0)) assert X.shape[0] == N if X.shape[0] > 0 and X.shape[1] > 0 and np.linalg.matrix_rank(X) != X.shape[1]: print('WARNING: Design matrix may not be full rank.') return X
def code_group_of_terms(terms, shared_numeric_factors): assert type(terms) == list assert all(type(term) == Term for term in terms) assert type(shared_numeric_factors) == OrderedSet # It's also the case that each term should contain no numeric # factors not mentions in `shared_numeric_factors`, but that is # not checked here. assert all( all((factor in term.factors) for factor in shared_numeric_factors) for term in terms) def drop_numeric_factors(term): factors = [f for f in term.factors if not f in shared_numeric_factors] return Term(OrderedSet(*factors)) categorical_terms = [drop_numeric_factors(term) for term in terms] codings_for_terms = code_categorical_terms(categorical_terms) num_codings_dict = {f: NumericCoding(f) for f in shared_numeric_factors} # This adds codings for the shared numeric factors to the coding # of a categorical interaction, respecting the factor order in the # source term. # # e.g. term = Term(<a,x,b>) # coding = (b+,) # Returns: # (x,b+) # (Assuming shared numeric factors is ['x'].) # def extend_with_numeric_factors(term, coding): cat_codings_dict = {c.factor: c for c in coding} # This gives us a dictionary that maps from factor names # (factors in coding U shared numeric factors) to codings # (e.g. CategoricalCoding, NumericCoding). codings_dict = dict(cat_codings_dict, **num_codings_dict) # We then grab all of these codings following the factor order # in the term. (Note that some factors in the term may not # appear in the coding.) out = [codings_dict[f] for f in term.factors if f in codings_dict] assert len(out) == len(codings_dict) return out assert len(terms) == len( codings_for_terms) # complain if zip will drop things return join( [[extend_with_numeric_factors(term, coding) for coding in codings] for (term, codings) in zip(terms, codings_for_terms)])
def code_terms(terms, metadata): assert type(metadata) == Metadata groups = partition_terms(terms, metadata) return join( code_group_of_terms(sort_by_order(terms), shared_num_factors) for shared_num_factors, terms in groups)
def leaves(node, path=[]): this = [(node, path)] if node.is_param else [] rest = join(leaves(n, path + [n.name]) for n in node.children) return this + rest
def all_from_terms(terms): return join(list(term.factors) for term in terms)