def _make_term_column_builders(terms, num_column_counts, cat_levels_contrasts): # Sort each term into a bucket based on the set of numeric factors it # contains: term_buckets = OrderedDict() bucket_ordering = [] for term in terms: num_factors = [] for factor in term.factors: if factor in num_column_counts: num_factors.append(factor) bucket = frozenset(num_factors) if bucket not in term_buckets: bucket_ordering.append(bucket) term_buckets.setdefault(bucket, []).append(term) # Special rule: if there is a no-numerics bucket, then it always comes # first: if frozenset() in term_buckets: bucket_ordering.remove(frozenset()) bucket_ordering.insert(0, frozenset()) term_to_column_builders = {} new_term_order = [] # Then within each bucket, work out which sort of contrasts we want to use # for each term to avoid redundancy for bucket in bucket_ordering: bucket_terms = term_buckets[bucket] # Sort by degree of interaction bucket_terms.sort(key=lambda t: len(t.factors)) new_term_order += bucket_terms used_subterms = set() for term in bucket_terms: column_builders = [] factor_codings = pick_contrasts_for_term(term, num_column_counts, used_subterms) # Construct one _ColumnBuilder for each subterm for factor_coding in factor_codings: builder_factors = [] num_columns = {} cat_contrasts = {} # In order to preserve factor ordering information, the # coding_for_term just returns dicts, and we refer to # the original factors to figure out which are included in # each subterm, and in what order for factor in term.factors: # Numeric factors are included in every subterm if factor in num_column_counts: builder_factors.append(factor) num_columns[factor] = num_column_counts[factor] elif factor in factor_coding: builder_factors.append(factor) levels, contrast = cat_levels_contrasts[factor] # This is where the default coding is set to # Treatment: coded = code_contrast_matrix(factor_coding[factor], levels, contrast, default=Treatment) cat_contrasts[factor] = coded column_builder = _ColumnBuilder(builder_factors, num_columns, cat_contrasts) column_builders.append(column_builder) term_to_column_builders[term] = column_builders return new_term_order, term_to_column_builders
def _make_subterm_infos(terms, num_column_counts, cat_levels_contrasts): # Sort each term into a bucket based on the set of numeric factors it # contains: term_buckets = OrderedDict() bucket_ordering = [] for term in terms: num_factors = [] for factor in term.factors: if factor in num_column_counts: num_factors.append(factor) bucket = frozenset(num_factors) if bucket not in term_buckets: bucket_ordering.append(bucket) term_buckets.setdefault(bucket, []).append(term) # Special rule: if there is a no-numerics bucket, then it always comes # first: if frozenset() in term_buckets: bucket_ordering.remove(frozenset()) bucket_ordering.insert(0, frozenset()) term_to_subterm_infos = OrderedDict() new_term_order = [] # Then within each bucket, work out which sort of contrasts we want to use # for each term to avoid redundancy for bucket in bucket_ordering: bucket_terms = term_buckets[bucket] # Sort by degree of interaction bucket_terms.sort(key=lambda t: len(t.factors)) new_term_order += bucket_terms used_subterms = set() for term in bucket_terms: subterm_infos = [] factor_codings = pick_contrasts_for_term(term, num_column_counts, used_subterms) # Construct one SubtermInfo for each subterm for factor_coding in factor_codings: subterm_factors = [] contrast_matrices = {} subterm_columns = 1 # In order to preserve factor ordering information, the # coding_for_term just returns dicts, and we refer to # the original factors to figure out which are included in # each subterm, and in what order for factor in term.factors: # Numeric factors are included in every subterm if factor in num_column_counts: subterm_factors.append(factor) subterm_columns *= num_column_counts[factor] elif factor in factor_coding: subterm_factors.append(factor) levels, contrast = cat_levels_contrasts[factor] # This is where the default coding is set to # Treatment: coded = code_contrast_matrix(factor_coding[factor], levels, contrast, default=Treatment) contrast_matrices[factor] = coded subterm_columns *= coded.matrix.shape[1] subterm_infos.append(SubtermInfo(subterm_factors, contrast_matrices, subterm_columns)) term_to_subterm_infos[term] = subterm_infos assert new_term_order == list(term_to_subterm_infos) return term_to_subterm_infos