Esempio n. 1
0
def precompute_minimal(data, settings):
    param = empty()
    cache = {}
    if settings.optype == 'class':
        param.alpha = settings.alpha
        param.alpha_per_class = float(param.alpha) / data['n_class']
        cache['y_train_counts'] = hist_count(data['y_train'],
                                             range(data['n_class']))
        cache['range_n_class'] = range(data['n_class'])
        param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class']
        param.alpha_vec = param.base_measure * param.alpha
    else:
        cache['sum_y'] = float(np.sum(data['y_train']))
        cache['sum_y2'] = float(np.sum(data['y_train']**2))
        cache['n_points'] = len(data['y_train'])
        warn('initializing prior mean and precision to their true values')
        # FIXME: many of the following are relevant only for mondrian forests
        param.prior_mean = np.mean(data['y_train'])
        param.prior_variance = np.var(data['y_train'])
        param.prior_precision = 1.0 / param.prior_variance
        if not settings.smooth_hierarchically:
            param.noise_variance = 0.01  # FIXME: hacky
        else:
            K = min(
                1000, data['n_train']
            )  # FIXME: measurement noise set to fraction of unconditional variance
            param.noise_variance = param.prior_variance / (
                1. + K)  # assume noise variance = prior_variance / (2K)
            # NOTE: max_split_cost scales inversely with the number of dimensions
        param.variance_coef = 2.0 * param.prior_variance
        param.sigmoid_coef = data['n_dim'] / (2.0 * np.log2(data['n_train']))
        param.noise_precision = 1.0 / param.noise_variance
    return (param, cache)
def precompute_minimal(data, settings):
    param = empty()
    cache = {}
    if settings.optype == 'class':
        param.alpha = settings.alpha
        param.alpha_per_class = float(param.alpha) / data['n_class']
        cache['y_train_counts'] = hist_count(data['y_train'], range(data['n_class']))
        cache['range_n_class'] = range(data['n_class'])
        param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class']
        param.alpha_vec = param.base_measure * param.alpha
    else:
        cache['sum_y'] = float(np.sum(data['y_train']))
        cache['sum_y2'] = float(np.sum(data['y_train'] ** 2))
        cache['n_points'] = len(data['y_train'])
        warn('initializing prior mean and precision to their true values')
        # FIXME: many of the following are relevant only for mondrian forests
        param.prior_mean = np.mean(data['y_train'])
        param.prior_variance = np.var(data['y_train'])
        param.prior_precision = 1.0 / param.prior_variance
        if not settings.smooth_hierarchically:
            param.noise_variance = 0.01     # FIXME: hacky
        else:
            K = min(1000, data['n_train'])     # FIXME: measurement noise set to fraction of unconditional variance
            param.noise_variance = param.prior_variance / (1. + K)  # assume noise variance = prior_variance / (2K)
            # NOTE: max_split_cost scales inversely with the number of dimensions
        param.variance_coef = 2.0 * param.prior_variance
        param.sigmoid_coef = data['n_dim']  / (2.0 * np.log2(data['n_train']))
        param.noise_precision = 1.0 / param.noise_variance
    return (param, cache)
def precompute_minimal(data, settings):
    param = empty()
    cache = {}
    assert settings.optype == 'class'
    if settings.optype == 'class':
        param.alpha = settings.alpha
        param.alpha_per_class = float(param.alpha) / data['n_class']
        cache['y_train_counts'] = hist_count(data['y_train'], range(data['n_class']))
        cache['range_n_class'] = range(data['n_class'])
        param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class']
        param.alpha_vec = param.base_measure * param.alpha
    return (param, cache)
def precompute_minimal(data, settings):
    param = empty()
    cache = {}
    assert settings.optype == 'class'
    if settings.optype == 'class':
        param.alpha = settings.alpha
        param.alpha_per_class = float(param.alpha) / data['n_class']
        cache['y_train_counts'] = hist_count(data['y_train'],
                                             range(data['n_class']))
        cache['range_n_class'] = range(data['n_class'])
        param.base_measure = (np.ones(data['n_class']) + 0.) / data['n_class']
        param.alpha_vec = param.base_measure * param.alpha
    return (param, cache)