def precompute_minimal(data, settings): param = empty() cache = {} if settings.optype == 'class': param.alpha = settings.alpha param.alpha_per_class = float(param.alpha) / data['n_class'] cache['y_train_counts'] = hist_count(data['y_train'], range(data['n_class'])) cache['range_n_class'] = range(data['n_class']) param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class'] param.alpha_vec = param.base_measure * param.alpha else: cache['sum_y'] = float(np.sum(data['y_train'])) cache['sum_y2'] = float(np.sum(data['y_train']**2)) cache['n_points'] = len(data['y_train']) warn('initializing prior mean and precision to their true values') # FIXME: many of the following are relevant only for mondrian forests param.prior_mean = np.mean(data['y_train']) param.prior_variance = np.var(data['y_train']) param.prior_precision = 1.0 / param.prior_variance if not settings.smooth_hierarchically: param.noise_variance = 0.01 # FIXME: hacky else: K = min( 1000, data['n_train'] ) # FIXME: measurement noise set to fraction of unconditional variance param.noise_variance = param.prior_variance / ( 1. + K) # assume noise variance = prior_variance / (2K) # NOTE: max_split_cost scales inversely with the number of dimensions param.variance_coef = 2.0 * param.prior_variance param.sigmoid_coef = data['n_dim'] / (2.0 * np.log2(data['n_train'])) param.noise_precision = 1.0 / param.noise_variance return (param, cache)
def precompute_minimal(data, settings): param = empty() cache = {} if settings.optype == 'class': param.alpha = settings.alpha param.alpha_per_class = float(param.alpha) / data['n_class'] cache['y_train_counts'] = hist_count(data['y_train'], range(data['n_class'])) cache['range_n_class'] = range(data['n_class']) param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class'] param.alpha_vec = param.base_measure * param.alpha else: cache['sum_y'] = float(np.sum(data['y_train'])) cache['sum_y2'] = float(np.sum(data['y_train'] ** 2)) cache['n_points'] = len(data['y_train']) warn('initializing prior mean and precision to their true values') # FIXME: many of the following are relevant only for mondrian forests param.prior_mean = np.mean(data['y_train']) param.prior_variance = np.var(data['y_train']) param.prior_precision = 1.0 / param.prior_variance if not settings.smooth_hierarchically: param.noise_variance = 0.01 # FIXME: hacky else: K = min(1000, data['n_train']) # FIXME: measurement noise set to fraction of unconditional variance param.noise_variance = param.prior_variance / (1. + K) # assume noise variance = prior_variance / (2K) # NOTE: max_split_cost scales inversely with the number of dimensions param.variance_coef = 2.0 * param.prior_variance param.sigmoid_coef = data['n_dim'] / (2.0 * np.log2(data['n_train'])) param.noise_precision = 1.0 / param.noise_variance return (param, cache)
def precompute_minimal(data, settings): param = empty() cache = {} assert settings.optype == 'class' if settings.optype == 'class': param.alpha = settings.alpha param.alpha_per_class = float(param.alpha) / data['n_class'] cache['y_train_counts'] = hist_count(data['y_train'], range(data['n_class'])) cache['range_n_class'] = range(data['n_class']) param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class'] param.alpha_vec = param.base_measure * param.alpha return (param, cache)