Beispiel #1
0
def train_SVR(X_train, y_train):
    print('Training SVR ...')
    svr = SVR()
    param_distributions = {
        'kernel':
        [DotProduct(),
         WhiteKernel(),
         RBF(),
         Matern(),
         RationalQuadratic()],
        'C':
        scipy.stats.reciprocal(1.0, 10.),
        #         'epsilon' : scipy.stats.uniform(0.1, 0.5),
        #         'gamma' : scipy.stats.reciprocal(0.01, 0.1),
    }
    randcv = RandomizedSearchCV(svr,
                                param_distributions,
                                n_iter=20,
                                cv=3,
                                n_jobs=-1,
                                random_state=0)
    randcv.fit(X_train, y_train)
    return randcv
Beispiel #2
0
 def GaussianProcess_regression(self, X_train, y_train, X_test, y_test):
     
     kernels = [1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)),
        1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1),
        ConstantKernel(0.1, (0.01, 10.0))
            * (DotProduct(sigma_0=1.0, sigma_0_bounds=(0.1, 10.0)) ** 2),
        1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0),
                     nu=1.5)]
     
     tuned_parameters = [{"kernel": kernels}]
     my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)
     model = GaussianProcessRegressor()
     gsearch_cv = GridSearchCV(estimator = model, param_grid = tuned_parameters, 
                               scoring = "neg_mean_squared_error", cv = my_cv, n_jobs=-1)
     gsearch_cv.fit(X_train, y_train)
     best_model = gsearch_cv.best_estimator_
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     mae = mean_absolute_error(y_test, y_pred)
     mse = mean_squared_error(y_test, y_pred)
     r2 = r2_score(y_test, y_pred)
     
     return best_model, mse, mae, r2
Beispiel #3
0
    def __init__(self, f, pbounds, random_state=None, verbose=2):
        """"""
        self._random_state = ensure_rng(random_state)
        print('这是随机状态')
        print(random_state)
        # Data structure containing the function to be optimized, the bounds of
        # its domain, and a record of the evaluations we have done so far
        self._space = TargetSpace(f, pbounds, random_state)

        # queue
        self._queue = Queue()

        # Internal GP regressor
        self._gp = GaussianProcessRegressor(
            kernel=Matern(nu=2.5),
            alpha=1e-6,
            normalize_y=True,
            n_restarts_optimizer=25,
            random_state=self._random_state,
        )

        self._verbose = verbose
        super(BayesianOptimization, self).__init__(events=DEFAULT_EVENTS)
Beispiel #4
0
def gp_filter(x, y,
              nu=2.5,
              length_scale=1.0,
              length_scale_bounds=(1e-05, 100000.0),
              noise_level=1.0,
              noise_level_bounds=(1e-05, 100000.0),
              **kwargs):
    """
    This 'filter' fits a GaussianProcessRegressor with added white noise kernel
    to the data. The smoothed value is obtained as predictions from the fitted
    model.
    Advantages:  Handles uneaqually sampled data, automatically adjusts
                 parameters
    Disadvantages: Slow, automatic.
    """
    kernel = ConstantKernel() *\
        Matern(length_scale=length_scale,
               length_scale_bounds=length_scale_bounds, nu=2.5) +\
        WhiteKernel(noise_level=noise_level,
                    noise_level_bounds=noise_level_bounds)
    gp = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
    gp.fit(x[:, np.newaxis], y[:, np.newaxis])
    return np.squeeze(gp.predict(x[:, np.newaxis]))
Beispiel #5
0
def fit_GP(xs, ys, std):
    """
    Fit Gaussian process with Matern kernel for errors against number of data
    points
    """

    X = np.zeros([len(xs), 2])
    X[:, 0] = 1.0
    X[:, 1] = xs
    gp = GaussianProcessRegressor(
        kernel=Matern(nu=2.5),
        alpha=std,
        n_restarts_optimizer=25,
    )
    gp.fit(X, ys)
    x = np.arange(3, 20, 0.005)
    X_pred = np.zeros([len(x), 2])
    X_pred[:, 0] = 1.0
    X_pred[:, 1] = x
    y, std = gp.predict(X_pred, return_std=True)
    y_plus = y + std
    y_minus = y - std
    return y, y_plus, y_minus, x
Beispiel #6
0
    def suggest(self):
        '''Return: Dict[self._exploration_space] => utility'''
        #print('BO: suggest')
        if not self._history:
            # Random choice
            return dict((act, random.random()) for act in self._exploration_space)

        # Fit GP
        x = [xy[0] for xy in self._history]
        y = [min(xy[1], xy[1] / 10) for xy in self._history]
        #y = [xy[1] / 100 for xy in self._history]
        self._gp = GaussianProcessRegressor(
            kernel=Matern(length_scale_bounds="fixed", nu=1.5),
            alpha=1e-10,
            normalize_y=True,
            n_restarts_optimizer=5,
        )
        self._gp.fit(x, y)

        # Utility
        utility_value = self._utility_function(self._exploration_space, self._gp, max(y))

        return dict(zip(self._exploration_space, utility_value))
    def get_next_trial(model_id: str, version: str) -> Mapping[str, float]:
        """

        :param model_id:
        :param version:
        :return:
        """
        state = Store.get_model(model_id)

        inputs, targets = state.get_inputs(), state.get_targets()

        kernel = Matern(state.get_length_scales(), state.kernel_constant,
                        state.get_length_scale_bounds())

        optimized_kernel = GaussianProcess.optimize_kernel(
            kernel, inputs, targets, state.sigma)

        bayesian_optimizer = BayesianOptimizer(optimized_kernel, state.sigma,
                                               state.get_feature_bounds())

        suggestion = bayesian_optimizer.suggest(inputs, targets)

        trial_inputs = {
            data_input.name: suggestion[inputs]
            for index, data_input in enumerate(state.inputs)
        }

        new_state = dataclasses.replace(
            state,
            length_scales=optimized_kernel.length_scales,
            kernel_constant=optimized_kernel.constant)

        Store.set_model(new_state)

        Store.set_trial_inputs(model_id, version, trial_inputs)

        return trial_inputs
Beispiel #8
0
def kernel_selector(xys, zs):
    # define limits of the hyperparameter space
    # bounds = [(0.00001, 0.0001), (0.0001, 0.001), (0.001, 0.01), (0.01, 0.1), (0.1, 1), (1, 10), (10, 100), (100, 1000), (1000, 10000), (10000, 100000)]
    # nu_bounds = [0.1, 0.5, 1, 1.5, 2, 2.5, 5, 20]
    bounds = [(0.00001, 0.001), (0.001, 0.1), (0.1, 10), (10, 1000),
              (1000, 100000)]
    nu_bounds = [0.5, 1.5, 2.5]

    # generate list of kernels to assess
    quads = [
        RationalQuadratic(length_scale_bounds=(a, b), alpha_bounds=(c, d))
        for a, b in bounds for c, d in bounds
    ]
    rbfs = [RBF(length_scale_bounds=(a, b)) for a, b in bounds]
    mtns = [
        Matern(length_scale_bounds=(a, b), nu=c) for a, b in bounds
        for c in nu_bounds
    ]
    # ExpSineSquared(),
    # kernels = [RationalQuadratic(length_scale_bounds=(0.08, 100)) + WhiteKernel(noise_level_bounds=(1e-5, 1e-2))]
    kernels = quads + rbfs + mtns

    # find best kernel on k-fold CV
    min_error = 99999
    for kernel0 in kernels:
        kernel = kernel0 + WhiteKernel(noise_level_bounds=(1e-5, 1e-2))
        gpr = GaussianProcessRegressor(kernel=kernel,
                                       n_restarts_optimizer=10,
                                       copy_X_train=True)
        # model = GaussianProcessRegressor(kernel=kernel + WhiteKernel(noise_level_bounds=(1e-5, 1e-2)), n_restarts_optimizer=10, copy_X_train=True)
        err = -1 * cross_val_score(
            gpr, xys, zs, scoring='neg_mean_absolute_error', cv=5).mean()
        # print((type(kernel).__name__[:12] + '\t:\t' + str(err)).expandtabs(13))
        if err < min_error:
            min_error = err
            min_error_kernel = kernel
    return min_error_kernel
Beispiel #9
0
    def __init__(self,
                 optimize_mode="maximize",
                 utility='ei',
                 kappa=5,
                 xi=0,
                 nu=2.5,
                 alpha=1e-6,
                 cold_start_num=10,
                 selection_num_warm_up=100000,
                 selection_num_starting_points=250):
        self._optimize_mode = OptimizeMode(optimize_mode)

        # utility function related
        self._utility = utility
        self._kappa = kappa
        self._xi = xi

        # target space
        self._space = None

        self._random_state = np.random.RandomState()

        # nu, alpha are GPR related params
        self._gp = GaussianProcessRegressor(kernel=Matern(nu=nu),
                                            alpha=alpha,
                                            normalize_y=True,
                                            n_restarts_optimizer=25,
                                            random_state=self._random_state)
        # num of random evaluations before GPR
        self._cold_start_num = cold_start_num

        # params for acq_max
        self._selection_num_warm_up = selection_num_warm_up
        self._selection_num_starting_points = selection_num_starting_points

        # num of imported data
        self._supplement_data_num = 0
    def test_adaptive_approximate_gaussian_process(self):
        from sklearn.gaussian_process.kernels import Matern
        num_vars = 1
        univariate_variables = [stats.uniform(-1, 2)] * num_vars

        # Generate random function
        nu = np.inf  # 2.5
        kernel = Matern(0.1, nu=nu)
        X = np.linspace(-1, 1, 1000)[np.newaxis, :]
        alpha = np.random.normal(0, 1, X.shape[1])

        def fun(x):
            return kernel(x.T, X.T).dot(alpha)[:, np.newaxis]
            # return np.cos(2*np.pi*x.sum(axis=0)/num_vars)[:, np.newaxis]

        errors = []
        validation_samples = np.random.uniform(-1, 1, (num_vars, 100))
        validation_values = fun(validation_samples)

        def callback(gp):
            gp_vals = gp(validation_samples)
            assert gp_vals.shape == validation_values.shape
            error = np.linalg.norm(gp_vals - validation_values
                                   ) / np.linalg.norm(validation_values)
            print(error, gp.y_train_.shape[0])
            errors.append(error)

        adaptive_approximate(
            fun, univariate_variables, "gaussian_process", {
                "nu": nu,
                "noise_level": None,
                "normalize_y": True,
                "alpha": 1e-10,
                "ncandidate_samples": 1e3,
                "callback": callback
            }).approx
        assert errors[-1] < 1e-8
Beispiel #11
0
def fit_gp(X, Y, x,y):
    """
    gaussian process regressor
    :param X:
        input of sampled data points 
    
    :param Y:
        output of sampled data points

    :param x:
        all input of data points
    
    :param y:
        all output of data points

    Return:
        mu: mean of output
        sigma : standard variance of output
    """
    gp = GaussianProcessRegressor(kernel=Matern(nu=2.5),n_restarts_optimizer=25)
    gp.fit(X, Y)
    mu, sigma = gp.predict(x, return_std=True)
    
    return mu, sigma
Beispiel #12
0
def main_real_data():
    kernel = Matern()
    for symbol in ["SPX", "SPXPM", "SX5E"]:
        dm_real = dc.DataManagerRealData(symbol)

        X_train, y_train = dm_real.get_training_data()

        train_index = X_train.sample(n=5000).index
        X_train, y_train = X_train.loc[train_index], y_train.loc[train_index]

        scaler = preprocessing.StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train)

        gpr_model = gaussian_process.GaussianProcessRegressor(kernel=kernel)

        gpr_model.fit(X_train, y_train)

        X_test, y_test = dm_real.get_test_data()
        X_test = scaler.transform(X_test)

        y_pred = gpr_model.predict(X_test)

        mse = mean_squared_error(y_test, y_pred)
        print(f"{symbol} - MSE: {mse}")
Beispiel #13
0
    def test_approximate_gaussian_process(self):
        from sklearn.gaussian_process.kernels import Matern
        num_vars = 1
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        num_samples = 100
        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)

        # Generate random function
        nu = np.inf  #2.5
        kernel = Matern(0.5, nu=nu)
        X = np.linspace(-1, 1, 1000)[np.newaxis, :]
        alpha = np.random.normal(0, 1, X.shape[1])
        train_vals = kernel(train_samples.T, X.T).dot(alpha)[:, np.newaxis]

        gp = approximate(train_samples, train_vals, 'gaussian_process',
                         {'nu': nu})

        error = np.linalg.norm(gp(X)[:, 0] -
                               kernel(X.T, X.T).dot(alpha)) / np.sqrt(
                                   X.shape[1])
        assert error < 1e-5
Beispiel #14
0
 def __init__(self, samples, dims, kernel_type, gamma_type = "auto"):
     self.training_counter = 0
     assert dims >= 1
     assert type(samples)  ==  type([])
     self.dims    =   dims
     
     #create a gaussian process regressor
     noise        =   0.1
     m52          =   ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)
     self.gpr     =   GaussianProcessRegressor(kernel=m52, alpha=noise**2) #default to CPU
     self.kmean   =   KMeans(n_clusters=2)
     #learned boundary
     self.svm     =   SVC(kernel = kernel_type, gamma=gamma_type)
     #data structures to store
     self.samples = []
     self.X       = np.array([])
     self.fX      = np.array([])
     
     #good region is labeled as zero
     #bad  region is labeled as one
     self.good_label_mean  = -1
     self.bad_label_mean   = -1
     
     self.update_samples(samples)
Beispiel #15
0
    def __init__(self, *args, **kwargs):
        kwargs_opt = {
            # HillClimbingOptimizer
            "eps": 1,
            # StochasticHillClimbingOptimizer
            "r": 1,
            # TabuOptimizer
            "tabu_memory": [3, 6, 9],
            # RandomRestartHillClimbingOptimizer
            "n_restarts": 10,
            # RandomAnnealingOptimizer
            "eps_global": 100,
            "t_rate": 0.98,
            # SimulatedAnnealingOptimizer
            "n_neighbours": 1,
            # StochasticTunnelingOptimizer
            "gamma": 1,
            # ParallelTemperingOptimizer
            "system_temps": [0.1, 0.2, 0.01],
            "n_swaps": 10,
            # ParticleSwarmOptimizer
            "n_part": 10,
            "w": 0.5,
            "c_k": 0.5,
            "c_s": 0.9,
            # EvolutionStrategyOptimizer
            "individuals": 10,
            "mutation_rate": 0.7,
            "crossover_rate": 0.3,
            # BayesianOptimizer
            "kernel": Matern(nu=2.5),
        }

        kwargs_opt = merge_dicts(kwargs_opt, kwargs)

        self._set_specific_args(kwargs_opt)
fig.suptitle("Dot-product kernel", fontsize=18)
plt.tight_layout()

# %%
print(f"Kernel parameters before fit:\n{kernel})")
print(
    f"Kernel parameters after fit: \n{gpr.kernel_} \n"
    f"Log-likelihood: {gpr.log_marginal_likelihood(gpr.kernel_.theta):.3f}"
)

# %%
# Matérn kernel
# ..............
from sklearn.gaussian_process.kernels import Matern

kernel = 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5)
gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)

fig, axs = plt.subplots(nrows=2, sharex=True, sharey=True, figsize=(10, 8))

# plot prior
plot_gpr_samples(gpr, n_samples=n_samples, ax=axs[0])
axs[0].set_title("Samples from prior distribution")

# plot posterior
gpr.fit(X_train, y_train)
plot_gpr_samples(gpr, n_samples=n_samples, ax=axs[1])
axs[1].scatter(X_train[:, 0], y_train, color="red", zorder=10, label="Observations")
axs[1].legend(bbox_to_anchor=(1.05, 1.5), loc="upper left")
axs[1].set_title("Samples from posterior distribution")
def parameterized_inference(
        algorithm='carl',
        morphing_aware=False,
        training_sample='baseline',  # 'baseline', 'basis', 'random'
        use_smearing=False,
        denominator=0,
        alpha=None,
        training_sample_size=None,
        do_neyman=False,
        options=''):  # all other options in a string
    """
    Likelihood ratio estimation through parameterized or morphing-aware versions of CARL, CASCAL, ROLR, and RASCAL.

    :param algorithm: Inference strategy. 'carl' for CARL, 'score' for an unnamed strategy that just uses the score,
                      'combined' for CASCAL, 'regression' for ROLR, or 'combinedregression' for RASCAL.
    :param morphing_aware: bool that decides whether a morphing-aware or morphing-agnostic parameterized architecture is
                           used.
    :param training_sample: Training sample. Can be 'baseline', 'basis', or 'random'.
    :param use_smearing: Whether to use the training and evaluation sample with (simplified) detector simulation.
    :param denominator: Which of five predefined denominator (reference) hypotheses to use.
    :param alpha: Hyperparameter that multiplies score term in loss function for RASCAL and CASCAL. If None, default
                  values are used.
    :param training_sample_size: If not None, limits the training sample size to the given value.
    :param do_neyman: Switches on the evaluation of toy experiments for the Neyman construction.
    :param options: Further options in a list of strings or string. 'learns' changes the architecture such that the
                    fully connected networks represent s rather than log r. 'new' changes the samples. 'short' and
                    'long' change the number of epochs. 'deep' and 'shallow' use more or less hidden layers. 'factorsm'
                    changes the architecture in the morphing-aware mode such that the SM and the deviation from it are
                    modelled independently. 'slowlearning' and 'fastlearning' change the learning rate, while
                    'constantlr' turns off the learning rate decay. 'neyman2' and 'neyman3' change the Neyman
                    construction sample, and 'recalibration' activates the calculation of E[r] on a separate sample
                    for the expectation calibration step. 'debug' activates a debug mode with much smaller samples.
    """

    logging.info('Starting parameterized inference')

    ################################################################################
    # Settings
    ################################################################################

    assert algorithm in [
        'carl', 'score', 'combined', 'regression', 'combinedregression'
    ]
    assert training_sample in ['baseline', 'basis', 'random']

    random_theta_mode = training_sample == 'random'
    basis_theta_mode = training_sample == 'basis'

    learn_logr_mode = ('learns' not in options)
    new_sample_mode = ('new' in options)
    short_mode = ('short' in options)
    long_mode = ('long' in options)
    deep_mode = ('deep' in options)
    shallow_mode = ('shallow' in options)
    debug_mode = ('debug' in options)
    factor_out_sm_in_aware_mode = morphing_aware and ('factorsm' in options)
    small_lr_mode = ('slowlearning' in options)
    large_lr_mode = ('fastlearning' in options)
    large_batch_mode = ('largebatch' in options)
    small_batch_mode = ('smallbatch' in options)
    constant_lr_mode = ('constantlr' in options)
    neyman2_mode = ('neyman2' in options)
    neyman3_mode = ('neyman3' in options)
    recalibration_mode = ('recalibration' in options)

    filename_addition = ''
    if morphing_aware:
        filename_addition = '_aware'

    if random_theta_mode:
        filename_addition += '_random'
    elif basis_theta_mode:
        filename_addition += '_basis'

    if not learn_logr_mode:
        filename_addition += '_learns'

    if factor_out_sm_in_aware_mode:
        filename_addition += '_factorsm'

    learning_rate = settings.learning_rate_default
    if small_lr_mode:
        filename_addition += '_slowlearning'
        learning_rate = settings.learning_rate_small
    elif large_lr_mode:
        filename_addition += '_fastlearning'
        learning_rate = settings.learning_rate_large

    lr_decay = settings.learning_rate_decay
    if constant_lr_mode:
        lr_decay = 0.
        filename_addition += '_constantlr'

    batch_size = settings.batch_size_default
    if large_batch_mode:
        filename_addition += '_largebatch'
        batch_size = settings.batch_size_large
    elif small_batch_mode:
        filename_addition += '_smallbatch'
        batch_size = settings.batch_size_small
    settings.batch_size = batch_size

    alpha_regression = settings.alpha_regression_default
    alpha_carl = settings.alpha_carl_default
    if alpha is not None:
        alpha_regression = alpha
        alpha_carl = alpha
        precision = int(max(-math.floor(np.log10(alpha)) + 1, 0))
        filename_addition += '_alpha_' + format_number(alpha, precision)

    n_hidden_layers = settings.n_hidden_layers_default
    if shallow_mode:
        n_hidden_layers = settings.n_hidden_layers_shallow
        filename_addition += '_shallow'
    elif deep_mode:
        n_hidden_layers = settings.n_hidden_layers_deep
        filename_addition += '_deep'

    n_epochs = settings.n_epochs_default
    early_stopping = True
    early_stopping_patience = settings.early_stopping_patience
    if debug_mode:
        n_epochs = settings.n_epochs_short
        early_stopping = False
        filename_addition += '_debug'
    elif long_mode:
        n_epochs = settings.n_epochs_long
        filename_addition += '_long'
    elif short_mode:
        n_epochs = settings.n_epochs_short
        early_stopping = False
        filename_addition += '_short'

    if training_sample_size is not None:
        filename_addition += '_trainingsamplesize_' + str(training_sample_size)
        n_epoch_factor = int(
            len(settings.thetas_train) *
            (settings.n_events_baseline_num + settings.n_events_baseline_den) /
            training_sample_size)
        n_epochs *= n_epoch_factor
        lr_decay /= float(n_epoch_factor)
        early_stopping_patience *= n_epoch_factor

    input_X_prefix = ''
    if use_smearing:
        input_X_prefix = 'smeared_'
        filename_addition += '_smeared'

    theta1 = settings.theta1_default
    input_filename_addition = ''
    if denominator > 0:
        input_filename_addition = '_denom' + str(denominator)
        filename_addition += '_denom' + str(denominator)
        theta1 = settings.theta1_alternatives[denominator - 1]

    if new_sample_mode:
        filename_addition += '_new'
        input_filename_addition += '_new'

    n_expected_events_neyman = settings.n_expected_events_neyman
    n_neyman_null_experiments = settings.n_neyman_null_experiments
    n_neyman_alternate_experiments = settings.n_neyman_alternate_experiments
    neyman_filename = 'neyman'
    if neyman2_mode:
        neyman_filename = 'neyman2'
        n_expected_events_neyman = settings.n_expected_events_neyman2
        n_neyman_null_experiments = settings.n_neyman2_null_experiments
        n_neyman_alternate_experiments = settings.n_neyman2_alternate_experiments
    if neyman3_mode:
        neyman_filename = 'neyman3'
        n_expected_events_neyman = settings.n_expected_events_neyman3
        n_neyman_null_experiments = settings.n_neyman3_null_experiments
        n_neyman_alternate_experiments = settings.n_neyman3_alternate_experiments

    results_dir = settings.base_dir + '/results/parameterized'
    neyman_dir = settings.neyman_dir + '/parameterized'

    logging.info('Main settings:')
    logging.info('  Algorithm:                %s', algorithm)
    logging.info('  Morphing-aware:           %s', morphing_aware)
    logging.info('  Training sample:          %s', training_sample)
    logging.info('  Denominator theta:        denominator %s = theta %s = %s',
                 denominator, theta1, settings.thetas[theta1])
    logging.info('Options:')
    logging.info('  Number of hidden layers:  %s', n_hidden_layers)
    if algorithm == 'combined':
        logging.info('  alpha:                    %s', alpha_carl)
    elif algorithm == 'combinedregression':
        logging.info('  alpha:                    %s', alpha_regression)
    logging.info('  Batch size:               %s', batch_size)
    logging.info('  Learning rate:            %s', learning_rate)
    logging.info('  Learning rate decay:      %s', lr_decay)
    logging.info('  Number of epochs:         %s', n_epochs)
    logging.info(
        '  Training samples:         %s',
        'all' if training_sample_size is None else training_sample_size)
    if do_neyman:
        logging.info(
            '  NC experiments:           (%s alternate + %s null) experiments with %s alternate events each',
            n_neyman_alternate_experiments, n_neyman_null_experiments,
            n_expected_events_neyman)
    else:
        logging.info('  NC experiments:           False')
    logging.info('  Debug mode:               %s', debug_mode)

    ################################################################################
    # Data
    ################################################################################

    # Load data
    train_filename = '_train'
    if random_theta_mode:
        train_filename += '_random'
    elif basis_theta_mode:
        train_filename += '_basis'
    train_filename += input_filename_addition

    X_train = np.load(settings.unweighted_events_dir + '/' + input_X_prefix +
                      'X' + train_filename + '.npy')
    X_train_unshuffled = np.load(settings.unweighted_events_dir + '/' +
                                 input_X_prefix + 'X' + train_filename +
                                 '.npy')
    y_train = np.load(settings.unweighted_events_dir + '/y' + train_filename +
                      '.npy')
    scores_train = np.load(settings.unweighted_events_dir + '/scores' +
                           train_filename + '.npy')
    r_train = np.load(settings.unweighted_events_dir + '/r' + train_filename +
                      '.npy')
    theta0_train = np.load(settings.unweighted_events_dir + '/theta0' +
                           train_filename + '.npy')
    theta0_train_unshuffled = np.load(settings.unweighted_events_dir +
                                      '/theta0' + train_filename + '.npy')

    X_calibration = np.load(settings.unweighted_events_dir + '/' +
                            input_X_prefix + 'X_calibration' +
                            input_filename_addition + '.npy')
    weights_calibration = np.load(settings.unweighted_events_dir +
                                  '/weights_calibration' +
                                  input_filename_addition + '.npy')

    if recalibration_mode:
        X_recalibration = np.load(settings.unweighted_events_dir + '/' +
                                  input_X_prefix + 'X_recalibration' + '.npy')

    X_test = np.load(settings.unweighted_events_dir + '/' + input_X_prefix +
                     'X_test' + input_filename_addition + '.npy')
    r_test = np.load(settings.unweighted_events_dir + '/r_test' +
                     input_filename_addition + '.npy')

    X_roam = np.load(settings.unweighted_events_dir + '/' + input_X_prefix +
                     'X_roam' + input_filename_addition + '.npy')
    n_roaming = len(X_roam)

    X_illustration = np.load(settings.unweighted_events_dir + '/' +
                             input_X_prefix + 'X_illustration' +
                             input_filename_addition + '.npy')

    if do_neyman:
        X_neyman_alternate = np.load(settings.unweighted_events_dir +
                                     '/neyman/' + input_X_prefix + 'X_' +
                                     neyman_filename + '_alternate.npy')

    n_events_test = X_test.shape[0]
    assert settings.n_thetas == r_test.shape[0]

    # Shuffle training data
    X_train, y_train, scores_train, r_train, theta0_train = shuffle(
        X_train, y_train, scores_train, r_train, theta0_train, random_state=44)

    # Limit training sample size
    keras_verbosity = 2
    if training_sample_size is not None:
        keras_verbosity = 0
        original_training_sample_size = X_train.shape[0]

        X_train = X_train[:training_sample_size]
        y_train = y_train[:training_sample_size]
        scores_train = scores_train[:training_sample_size]
        r_train = r_train[:training_sample_size]
        theta0_train = theta0_train[:training_sample_size]

        logging.info('Reduced training sample size from %s to %s (factor %s)',
                     original_training_sample_size, X_train.shape[0],
                     n_epoch_factor)

    # Normalize data
    scaler = StandardScaler()
    scaler.fit(np.array(X_train, dtype=np.float64))
    X_train_transformed = scaler.transform(X_train)
    X_train_transformed_unshuffled = scaler.transform(X_train_unshuffled)
    X_test_transformed = scaler.transform(X_test)
    X_roam_transformed = scaler.transform(X_roam)
    X_calibration_transformed = scaler.transform(X_calibration)
    X_illustration_transformed = scaler.transform(X_illustration)
    if recalibration_mode:
        X_recalibration_transformed = scaler.transform(X_recalibration)
    if do_neyman:
        X_neyman_alternate_transformed = scaler.transform(
            X_neyman_alternate.reshape((-1, X_neyman_alternate.shape[2])))

    # Roaming data
    X_thetas_train = np.hstack((X_train_transformed, theta0_train))
    X_thetas_train_unshuffled = np.hstack(
        (X_train_transformed_unshuffled, theta0_train_unshuffled))
    y_logr_score_train = np.hstack(
        (y_train.reshape(-1, 1), np.log(r_train).reshape(
            (-1, 1)), scores_train))
    xi = np.linspace(-1.0, 1.0, settings.n_thetas_roam)
    yi = np.linspace(-1.0, 1.0, settings.n_thetas_roam)
    xx, yy = np.meshgrid(xi, yi)
    thetas_roam = np.asarray((xx.flatten(), yy.flatten())).T
    X_thetas_roam = []
    for i in range(n_roaming):
        X_thetas_roam.append(
            np.zeros(
                (settings.n_thetas_roam**2, X_roam_transformed.shape[1] + 2)))
        X_thetas_roam[-1][:, :-2] = X_roam_transformed[i, :]
        X_thetas_roam[-1][:, -2:] = thetas_roam

    if debug_mode:
        X_thetas_train = X_thetas_train[::100]
        y_logr_score_train = y_logr_score_train[::100]
        X_test_transformed = X_test[::100]
        r_test = r_test[:, ::100]
        X_calibration_transformed = X_calibration_transformed[::100]
        weights_calibration = weights_calibration[:, ::100]
        X_illustration_transformed = X_illustration_transformed[::100]
        if recalibration_mode:
            X_recalibration_transformed = X_recalibration_transformed[::100]
        n_events_test = len(X_test_transformed)

    ################################################################################
    # Training
    ################################################################################

    if algorithm == 'carl':
        if morphing_aware:
            regr = KerasRegressor(lambda: make_classifier_carl_morphingaware(
                n_hidden_layers=n_hidden_layers,
                learn_log_r=learn_logr_mode,
                learning_rate=learning_rate),
                                  epochs=n_epochs,
                                  validation_split=settings.validation_split,
                                  verbose=keras_verbosity)
        else:
            regr = KerasRegressor(
                lambda: make_classifier_carl(n_hidden_layers=n_hidden_layers,
                                             learn_log_r=learn_logr_mode,
                                             learning_rate=learning_rate),
                epochs=n_epochs,
                validation_split=settings.validation_split,
                verbose=keras_verbosity)

    elif algorithm == 'score':
        if morphing_aware:
            regr = KerasRegressor(lambda: make_classifier_score_morphingaware(
                n_hidden_layers=n_hidden_layers,
                learn_log_r=learn_logr_mode,
                learning_rate=learning_rate),
                                  epochs=n_epochs,
                                  validation_split=settings.validation_split,
                                  verbose=keras_verbosity)
        else:
            regr = KerasRegressor(
                lambda: make_classifier_score(n_hidden_layers=n_hidden_layers,
                                              learn_log_r=learn_logr_mode,
                                              learning_rate=learning_rate),
                epochs=n_epochs,
                validation_split=settings.validation_split,
                verbose=keras_verbosity)

    elif algorithm == 'combined':
        if morphing_aware:
            regr = KerasRegressor(
                lambda: make_classifier_combined_morphingaware(
                    n_hidden_layers=n_hidden_layers,
                    learn_log_r=learn_logr_mode,
                    alpha=alpha_carl,
                    learning_rate=learning_rate),
                epochs=n_epochs,
                validation_split=settings.validation_split,
                verbose=keras_verbosity)
        else:
            regr = KerasRegressor(lambda: make_classifier_combined(
                n_hidden_layers=n_hidden_layers,
                learn_log_r=learn_logr_mode,
                alpha=alpha_carl,
                learning_rate=learning_rate),
                                  epochs=n_epochs,
                                  validation_split=settings.validation_split,
                                  verbose=keras_verbosity)

    elif algorithm == 'regression':
        if morphing_aware:
            regr = KerasRegressor(lambda: make_regressor_morphingaware(
                n_hidden_layers=n_hidden_layers,
                factor_out_sm=factor_out_sm_in_aware_mode,
                learning_rate=learning_rate),
                                  epochs=n_epochs,
                                  validation_split=settings.validation_split,
                                  verbose=keras_verbosity)
        else:
            regr = KerasRegressor(
                lambda: make_regressor(n_hidden_layers=n_hidden_layers),
                epochs=n_epochs,
                validation_split=settings.validation_split,
                verbose=keras_verbosity)

    elif algorithm == 'combinedregression':
        if morphing_aware:
            regr = KerasRegressor(
                lambda: make_combined_regressor_morphingaware(
                    n_hidden_layers=n_hidden_layers,
                    factor_out_sm=factor_out_sm_in_aware_mode,
                    alpha=alpha_regression,
                    learning_rate=learning_rate),
                epochs=n_epochs,
                validation_split=settings.validation_split,
                verbose=keras_verbosity)
        else:
            regr = KerasRegressor(lambda: make_combined_regressor(
                n_hidden_layers=n_hidden_layers,
                alpha=alpha_regression,
                learning_rate=learning_rate),
                                  epochs=n_epochs,
                                  validation_split=settings.validation_split,
                                  verbose=keras_verbosity)

    else:
        raise ValueError()

    # Callbacks
    callbacks = []
    detailed_history = {}
    callbacks.append(DetailedHistory(detailed_history))
    if not constant_lr_mode:

        def lr_scheduler(epoch):
            return learning_rate * np.exp(-epoch * lr_decay)

        callbacks.append(LearningRateScheduler(lr_scheduler))
    if early_stopping:
        callbacks.append(
            EarlyStopping(verbose=1, patience=early_stopping_patience))

    # Training
    logging.info('Starting training')
    history = regr.fit(X_thetas_train[::],
                       y_logr_score_train[::],
                       callbacks=callbacks,
                       batch_size=batch_size)

    # Save metrics
    def _save_metrics(key, filename):
        try:
            metrics = np.asarray(
                [history.history[key], history.history['val_' + key]])
            np.save(
                results_dir + '/traininghistory_' + filename + '_' +
                algorithm + filename_addition + '.npy', metrics)
        except KeyError:
            logging.warning(
                'Key %s not found in per-epoch history. Available keys: %s',
                key, list(history.history.keys()))
        try:
            detailed_metrics = np.asarray(detailed_history[key])
            np.save(
                results_dir + '/traininghistory_100batches_' + filename + '_' +
                algorithm + filename_addition + '.npy', detailed_metrics)
        except KeyError:
            logging.warning(
                'Key %s not found in per-batch history. Available keys: %s',
                key, list(detailed_history.keys()))

    _save_metrics('loss', 'loss')
    _save_metrics('full_cross_entropy', 'ce')
    _save_metrics('full_mse_log_r', 'mse_logr')
    _save_metrics('full_mse_score', 'mse_scores')

    # Evaluate rhat on training sample
    # r_hat_train = np.exp(regr.predict(X_thetas_train_unshuffled)[:, 1])
    # np.save(results_dir + '/r_train_' + algorithm + filename_addition + '.npy', r_hat_train)

    ################################################################################
    # Raw evaluation loop
    ################################################################################

    # carl wrapper
    # ratio = ClassifierScoreRatio(regr, prefit=True)

    logging.info('Starting evaluation')
    expected_llr = []
    mse_log_r = []
    trimmed_mse_log_r = []
    eval_times = []
    expected_r_vs_sm = []
    if recalibration_mode:
        recalibration_expected_r = []

    for t, theta in enumerate(settings.thetas):

        if (t + 1) % 100 == 0:
            logging.info('Starting theta %s / %s', t + 1, settings.n_thetas)

        ################################################################################
        # Evaluation
        ################################################################################

        # Prepare test data
        thetas0_array = np.zeros((X_test_transformed.shape[0], 2),
                                 dtype=X_test_transformed.dtype)
        thetas0_array[:, :] = theta
        X_thetas_test = np.hstack((X_test_transformed, thetas0_array))

        # Evaluation
        time_before = time.time()
        prediction = regr.predict(X_thetas_test)
        eval_times.append(time.time() - time_before)

        this_log_r = prediction[:, 1]
        this_score = prediction[:, 2:4]
        if morphing_aware:
            this_wi = prediction[:, 4:19]
            this_ri = prediction[:, 19:]
            logging.debug('Morphing weights for theta %s (%s): %s', t, theta,
                          this_wi[0])

        # Extract numbers of interest
        expected_llr.append(-2. * settings.n_expected_events / n_events_test *
                            np.sum(this_log_r))
        mse_log_r.append(
            calculate_mean_squared_error(np.log(r_test[t]), this_log_r, 0.))
        trimmed_mse_log_r.append(
            calculate_mean_squared_error(np.log(r_test[t]), this_log_r,
                                         'auto'))

        if t == settings.theta_observed:
            r_sm = np.exp(this_log_r)
        expected_r_vs_sm.append(np.mean(np.exp(this_log_r) / r_sm))

        # For benchmark thetas, save more info
        if t == settings.theta_benchmark_nottrained:
            np.save(
                results_dir + '/r_nottrained_' + algorithm +
                filename_addition + '.npy', np.exp(this_log_r))
            np.save(
                results_dir + '/scores_nottrained_' + algorithm +
                filename_addition + '.npy', this_score)
            np.save(
                results_dir + '/r_vs_sm_nottrained_' + algorithm +
                filename_addition + '.npy',
                np.exp(this_log_r) / r_sm)
            if morphing_aware:
                np.save(
                    results_dir + '/morphing_ri_nottrained_' + algorithm +
                    filename_addition + '.npy', this_ri)
                np.save(
                    results_dir + '/morphing_wi_nottrained_' + algorithm +
                    filename_addition + '.npy', this_wi)
        elif t == settings.theta_benchmark_trained:
            np.save(
                results_dir + '/r_trained_' + algorithm + filename_addition +
                '.npy', np.exp(this_log_r))
            np.save(
                results_dir + '/scores_trained_' + algorithm +
                filename_addition + '.npy', this_score)
            np.save(
                results_dir + '/r_vs_sm_trained_' + algorithm +
                filename_addition + '.npy',
                np.exp(this_log_r) / r_sm)
            if morphing_aware:
                np.save(
                    results_dir + '/morphing_ri_trained_' + algorithm +
                    filename_addition + '.npy', this_ri)
                np.save(
                    results_dir + '/morphing_wi_trained_' + algorithm +
                    filename_addition + '.npy', this_wi)

        ################################################################################
        # Recalibration
        ################################################################################

        if recalibration_mode:
            # Prepare data for recalibration
            thetas0_array = np.zeros((X_recalibration_transformed.shape[0], 2),
                                     dtype=X_recalibration_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_recalibration = np.hstack(
                (X_recalibration_transformed, thetas0_array))

            # Evaluate recalibration data
            prediction = regr.predict(X_thetas_recalibration)
            this_r = np.exp(prediction[:, 1])
            if t == settings.theta_observed:
                r_recalibration_sm = this_r
            recalibration_expected_r.append(
                np.mean(this_r / r_recalibration_sm))

        ################################################################################
        # Illustration
        ################################################################################

        if t == settings.theta_benchmark_illustration:
            # Prepare data for illustration
            thetas0_array = np.zeros((X_illustration_transformed.shape[0], 2),
                                     dtype=X_illustration_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_illustration = np.hstack(
                (X_illustration_transformed, thetas0_array))

            # Evaluate illustration data
            prediction = regr.predict(X_thetas_illustration)
            r_hat_illustration = np.exp(prediction[:, 1])

            np.save(
                results_dir + '/r_illustration_' + algorithm +
                filename_addition + '.npy', r_hat_illustration)

        ################################################################################
        # Neyman construction toys
        ################################################################################

        if do_neyman:
            # Prepare alternate data for Neyman construction
            thetas0_array = np.zeros(
                (X_neyman_alternate_transformed.shape[0], 2),
                dtype=X_neyman_alternate_transformed.dtype)
            thetas0_array[:, :] = theta
            X_thetas_neyman_alternate = np.hstack(
                (X_neyman_alternate_transformed, thetas0_array))

            # Neyman construction: evaluate alternate sample (raw)
            log_r_neyman_alternate = regr.predict(X_thetas_neyman_alternate)[:,
                                                                             1]
            llr_neyman_alternate = -2. * np.sum(log_r_neyman_alternate.reshape(
                (-1, n_expected_events_neyman)),
                                                axis=1)
            np.save(
                neyman_dir + '/' + neyman_filename + '_llr_alternate_' +
                str(t) + '_' + algorithm + filename_addition + '.npy',
                llr_neyman_alternate)

            # NC: null
            X_neyman_null = np.load(settings.unweighted_events_dir +
                                    '/neyman/' + input_X_prefix + 'X_' +
                                    neyman_filename + '_null_' + str(t) +
                                    '.npy')
            X_neyman_null_transformed = scaler.transform(
                X_neyman_null.reshape((-1, X_neyman_null.shape[2])))

            # Prepare null data for Neyman construction
            thetas0_array = np.zeros((X_neyman_null_transformed.shape[0], 2),
                                     dtype=X_neyman_null_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_neyman_null = np.hstack(
                (X_neyman_null_transformed, thetas0_array))

            # Neyman construction: evaluate null sample (raw)
            log_r_neyman_null = regr.predict(X_thetas_neyman_null)[:, 1]
            llr_neyman_null = -2. * np.sum(log_r_neyman_null.reshape(
                (-1, n_expected_events_neyman)),
                                           axis=1)
            np.save(
                neyman_dir + '/' + neyman_filename + '_llr_null_' + str(t) +
                '_' + algorithm + filename_addition + '.npy', llr_neyman_null)

            # NC: null evaluated at alternate
            if t == settings.theta_observed:
                for tt in range(settings.n_thetas):
                    X_neyman_null = np.load(settings.unweighted_events_dir +
                                            '/neyman/' + input_X_prefix +
                                            'X_' + neyman_filename + '_null_' +
                                            str(tt) + '.npy')
                    X_neyman_null_transformed = scaler.transform(
                        X_neyman_null.reshape((-1, X_neyman_null.shape[2])))
                    X_thetas_neyman_null = np.hstack(
                        (X_neyman_null_transformed, thetas0_array))

                    # Neyman construction: evaluate null sample (raw)
                    log_r_neyman_null = regr.predict(X_thetas_neyman_null)[:,
                                                                           1]
                    llr_neyman_null = -2. * np.sum(log_r_neyman_null.reshape(
                        (-1, n_expected_events_neyman)),
                                                   axis=1)
                    np.save(
                        neyman_dir + '/' + neyman_filename +
                        '_llr_nullatalternate_' + str(tt) + '_' + algorithm +
                        filename_addition + '.npy', llr_neyman_null)

    # Save evaluation results
    expected_llr = np.asarray(expected_llr)
    mse_log_r = np.asarray(mse_log_r)
    trimmed_mse_log_r = np.asarray(trimmed_mse_log_r)
    expected_r_vs_sm = np.asarray(expected_r_vs_sm)
    np.save(results_dir + '/llr_' + algorithm + filename_addition + '.npy',
            expected_llr)
    np.save(
        results_dir + '/mse_logr_' + algorithm + filename_addition + '.npy',
        mse_log_r)
    np.save(
        results_dir + '/trimmed_mse_logr_' + algorithm + filename_addition +
        '.npy', trimmed_mse_log_r)
    np.save(
        results_dir + '/expected_r_vs_sm_' + algorithm + filename_addition +
        '.npy', expected_r_vs_sm)
    if recalibration_mode:
        recalibration_expected_r = np.asarray(recalibration_expected_r)
        np.save(
            results_dir + '/recalibration_expected_r_vs_sm_' + algorithm +
            filename_addition + '.npy', recalibration_expected_r)

    # Evaluation times
    logging.info('Evaluation timing: median %s s, mean %s s',
                 np.median(eval_times), np.mean(eval_times))

    logging.info('Starting roaming')
    r_roam = []
    for i in range(n_roaming):
        prediction = regr.predict(X_thetas_roam[i])
        r_roam.append(np.exp(prediction[:, 1]))
    r_roam = np.asarray(r_roam)
    np.save(results_dir + '/r_roam_' + algorithm + filename_addition + '.npy',
            r_roam)

    ################################################################################
    # Calibrated evaluation loop
    ################################################################################

    logging.info('Starting calibrated evaluation and roaming')
    expected_llr_calibrated = []
    mse_log_r_calibrated = []
    trimmed_mse_log_r_calibrated = []
    r_roam_temp = np.zeros((settings.n_thetas, n_roaming))
    eval_times = []
    expected_r_vs_sm = []
    if recalibration_mode:
        recalibration_expected_r = []

    for t, theta in enumerate(settings.thetas):

        if (t + 1) % 100 == 0:
            logging.info('Starting theta %s / %s', t + 1, settings.n_thetas)

        ################################################################################
        # Calibration
        ################################################################################

        # Prepare data for calibration
        n_calibration_each = X_calibration_transformed.shape[0]
        thetas0_array = np.zeros((n_calibration_each, 2),
                                 dtype=X_calibration_transformed.dtype)
        thetas0_array[:, :] = settings.thetas[t]
        X_thetas_calibration = np.hstack(
            (X_calibration_transformed, thetas0_array))
        X_thetas_calibration = np.vstack(
            (X_thetas_calibration, X_thetas_calibration))
        y_calibration = np.zeros(2 * n_calibration_each)
        y_calibration[n_calibration_each:] = 1.
        w_calibration = np.zeros(2 * n_calibration_each)
        w_calibration[:n_calibration_each] = weights_calibration[t]
        w_calibration[n_calibration_each:] = weights_calibration[theta1]

        # Calibration
        ratio_calibrated = ClassifierScoreRatio(
            CalibratedClassifierScoreCV(regr, cv='prefit', method='isotonic'))
        ratio_calibrated.fit(X_thetas_calibration,
                             y_calibration,
                             sample_weight=w_calibration)

        ################################################################################
        # Evaluation
        ################################################################################

        # Prepare data
        thetas0_array = np.zeros((X_test_transformed.shape[0], 2),
                                 dtype=X_test_transformed.dtype)
        thetas0_array[:, :] = settings.thetas[t]
        X_thetas_test = np.hstack((X_test_transformed, thetas0_array))

        time_before = time.time()
        this_r, this_other = ratio_calibrated.predict(X_thetas_test)
        eval_times.append(time.time() - time_before)
        this_score = this_other[:, 1:3]

        # Extract numbers of interest
        expected_llr_calibrated.append(-2. * settings.n_expected_events /
                                       n_events_test * np.sum(np.log(this_r)))
        mse_log_r_calibrated.append(
            calculate_mean_squared_error(np.log(r_test[t]), np.log(this_r),
                                         0.))
        trimmed_mse_log_r_calibrated.append(
            calculate_mean_squared_error(np.log(r_test[t]), np.log(this_r),
                                         'auto'))

        if t == settings.theta_observed:
            r_sm = this_r
        expected_r_vs_sm.append(np.mean(this_r / r_sm))

        # For benchmark theta, save more data
        if t == settings.theta_benchmark_nottrained:
            np.save(
                results_dir + '/scores_nottrained_' + algorithm +
                '_calibrated' + filename_addition + '.npy', this_score)
            np.save(
                results_dir + '/r_nottrained_' + algorithm + '_calibrated' +
                filename_addition + '.npy', this_r)
            np.save(
                results_dir + '/r_vs_sm_nottrained_' + algorithm +
                '_calibrated' + filename_addition + '.npy', this_r / r_sm)
            np.save(
                results_dir + '/calvalues_nottrained_' + algorithm +
                filename_addition + '.npy', ratio_calibrated.classifier_.
                calibration_sample[:n_calibration_each])

        elif t == settings.theta_benchmark_trained:
            np.save(
                results_dir + '/scores_trained_' + algorithm + '_calibrated' +
                filename_addition + '.npy', this_score)
            np.save(
                results_dir + '/r_trained_' + algorithm + '_calibrated' +
                filename_addition + '.npy', this_r)
            np.save(
                results_dir + '/r_vs_sm_trained_' + algorithm + '_calibrated' +
                filename_addition + '.npy', this_r / r_sm)
            np.save(
                results_dir + '/calvalues_trained_' + algorithm +
                filename_addition + '.npy', ratio_calibrated.classifier_.
                calibration_sample[:n_calibration_each])

        ################################################################################
        # Recalibration
        ################################################################################

        if recalibration_mode:
            # Prepare data for recalibration
            thetas0_array = np.zeros((X_recalibration_transformed.shape[0], 2),
                                     dtype=X_recalibration_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_recalibration = np.hstack(
                (X_recalibration_transformed, thetas0_array))

            # Evaluate recalibration data
            this_r, _ = ratio_calibrated.predict(X_thetas_recalibration)
            if t == settings.theta_observed:
                r_recalibration_sm = this_r
            recalibration_expected_r.append(
                np.mean(this_r / r_recalibration_sm))

        ################################################################################
        # Illustration
        ################################################################################

        if t == settings.theta_benchmark_illustration:
            # Prepare data for illustration
            thetas0_array = np.zeros((X_illustration_transformed.shape[0], 2),
                                     dtype=X_illustration_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_illustration = np.hstack(
                (X_illustration_transformed, thetas0_array))

            # Evaluate illustration data
            r_hat_illustration, _ = ratio_calibrated.predict(
                X_thetas_illustration)

            np.save(
                results_dir + '/r_illustration_' + algorithm + '_calibrated' +
                filename_addition + '.npy', r_hat_illustration)

        ################################################################################
        # Neyman construction toys
        ################################################################################

        # Neyman construction
        if do_neyman:
            # Prepare alternate data for Neyman construction
            thetas0_array = np.zeros(
                (X_neyman_alternate_transformed.shape[0], 2),
                dtype=X_neyman_alternate_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_neyman_alternate = np.hstack(
                (X_neyman_alternate_transformed, thetas0_array))

            # Neyman construction: alternate (calibrated)
            r_neyman_alternate, _ = ratio_calibrated.predict(
                X_thetas_neyman_alternate)
            llr_neyman_alternate = -2. * np.sum(
                np.log(r_neyman_alternate).reshape(
                    (-1, n_expected_events_neyman)),
                axis=1)
            np.save(
                neyman_dir + '/' + neyman_filename + '_llr_alternate_' +
                str(t) + '_' + algorithm + '_calibrated' + filename_addition +
                '.npy', llr_neyman_alternate)

            # Neyman construction: null
            X_neyman_null = np.load(settings.unweighted_events_dir +
                                    '/neyman/' + input_X_prefix + 'X_' +
                                    neyman_filename + '_null_' + str(t) +
                                    '.npy')
            X_neyman_null_transformed = scaler.transform(
                X_neyman_null.reshape((-1, X_neyman_null.shape[2])))

            # Prepare null data for Neyman construction
            thetas0_array = np.zeros((X_neyman_null_transformed.shape[0], 2),
                                     dtype=X_neyman_null_transformed.dtype)
            thetas0_array[:, :] = settings.thetas[t]
            X_thetas_neyman_null = np.hstack(
                (X_neyman_null_transformed, thetas0_array))

            # Neyman construction: evaluate null (calibrated)
            r_neyman_null, _ = ratio_calibrated.predict(X_thetas_neyman_null)
            llr_neyman_null = -2. * np.sum(np.log(r_neyman_null).reshape(
                (-1, n_expected_events_neyman)),
                                           axis=1)

            np.save(
                neyman_dir + '/' + neyman_filename + '_llr_null_' + str(t) +
                '_' + algorithm + '_calibrated' + filename_addition + '.npy',
                llr_neyman_null)

            # NC: null evaluated at alternate
            if t == settings.theta_observed:
                for tt in range(settings.n_thetas):
                    X_neyman_null = np.load(settings.unweighted_events_dir +
                                            '/neyman/' + input_X_prefix +
                                            'X_' + neyman_filename + '_null_' +
                                            str(tt) + '.npy')
                    X_neyman_null_transformed = scaler.transform(
                        X_neyman_null.reshape((-1, X_neyman_null.shape[2])))
                    X_thetas_neyman_null = np.hstack(
                        (X_neyman_null_transformed, thetas0_array))

                    # Neyman construction: evaluate null sample (calibrated)
                    r_neyman_null, _ = ratio_calibrated.predict(
                        X_thetas_neyman_null)
                    llr_neyman_null = -2. * np.sum(
                        np.log(r_neyman_null).reshape(
                            (-1, n_expected_events_neyman)),
                        axis=1)
                    np.save(
                        neyman_dir + '/' + neyman_filename +
                        '_llr_nullatalternate_' + str(tt) + '_' + algorithm +
                        '_calibrated' + filename_addition + '.npy',
                        llr_neyman_null)

        # Roaming
        thetas0_array = np.zeros((n_roaming, 2),
                                 dtype=X_roam_transformed.dtype)
        thetas0_array[:, :] = settings.thetas[t]
        X_thetas_roaming_temp = np.hstack((X_roam_transformed, thetas0_array))
        r_roam_temp[t, :], _ = ratio_calibrated.predict(X_thetas_roaming_temp)

    # Save evaluation results
    expected_llr_calibrated = np.asarray(expected_llr_calibrated)
    mse_log_r_calibrated = np.asarray(mse_log_r_calibrated)
    trimmed_mse_log_r_calibrated = np.asarray(trimmed_mse_log_r_calibrated)
    expected_r_vs_sm = np.asarray(expected_r_vs_sm)
    if recalibration_mode:
        recalibration_expected_r = np.asarray(recalibration_expected_r)
    np.save(
        results_dir + '/llr_' + algorithm + '_calibrated' + filename_addition +
        '.npy', expected_llr_calibrated)
    np.save(
        results_dir + '/mse_logr_' + algorithm + '_calibrated' +
        filename_addition + '.npy', mse_log_r_calibrated)
    np.save(
        results_dir + '/trimmed_mse_logr_' + algorithm + '_calibrated' +
        filename_addition + '.npy', trimmed_mse_log_r_calibrated)
    np.save(
        results_dir + '/expected_r_vs_sm_' + algorithm + '_calibrated' +
        filename_addition + '.npy', expected_r_vs_sm)
    if recalibration_mode:
        recalibration_expected_r = np.asarray(recalibration_expected_r)
        np.save(
            results_dir + '/recalibration_expected_r_vs_sm_' + algorithm +
            '_calibrated' + filename_addition + '.npy',
            recalibration_expected_r)

    # Evaluation times
    logging.info('Calibrated evaluation timing: median %s s, mean %s s',
                 np.median(eval_times), np.mean(eval_times))

    logging.info('Interpolating calibrated roaming')
    gp = GaussianProcessRegressor(normalize_y=True,
                                  kernel=C(1.0) * Matern(1.0, nu=0.5),
                                  n_restarts_optimizer=10)
    gp.fit(settings.thetas[:], np.log(r_roam_temp))
    r_roam_calibrated = np.exp(gp.predict(np.c_[xx.ravel(), yy.ravel()])).T
    np.save(
        results_dir + '/r_roam_' + algorithm + '_calibrated' +
        filename_addition + '.npy', r_roam_calibrated)
Beispiel #18
0
from regular_array_sampling.functions import regular_array_monte_carlo

noise = 0.01
# create array of sampled regular array layouts
cand_points = regular_array_monte_carlo(5000)
# create testing points
X_test, X_test_tran, y_test = create_testing_points_transformed(noise)

n_target = 60

# create training points
X_train, X_train_tran, y_train, n_train = \
    create_training_points_regular_transformed(n_target, noise, cand_points)

# fit GP regression and calculate rmse
kernel = 1.0 ** 2 * Matern(length_scale=[1., 1., 1., 1., 1., 1.], nu=2.5) \
    + WhiteKernel(noise_level=1e-8, noise_level_bounds=[1e-10, 1])
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=20)
scaler = StandardScaler()
scaler.fit(X_train_tran)
X_train_stan = scaler.transform(X_train_tran)
X_test_stan = scaler.transform(X_test_tran)
gp.fit(X_train_stan, y_train)
y_predict, std = gp.predict(X_test_stan, return_std=True)
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
# report rmse
print(n_train, ' RMSE: ', np.sqrt(mse), ' MAE: ', mae)

default_mse = mean_squared_error(y_test, np.zeros(len(y_test)))
print(np.sqrt(default_mse))
Beispiel #19
0
from sklearn.utils.testing import (assert_equal, assert_almost_equal,
                                   assert_not_equal, assert_array_equal,
                                   assert_array_almost_equal)

X = np.random.RandomState(0).normal(0, 1, (5, 2))
Y = np.random.RandomState(0).normal(0, 1, (6, 2))

kernel_white = RBF(length_scale=2.0) + WhiteKernel(noise_level=3.0)
kernels = [
    RBF(length_scale=2.0),
    RBF(length_scale_bounds=(0.5, 2.0)),
    ConstantKernel(constant_value=10.0),
    2.0 * RBF(length_scale=0.33, length_scale_bounds="fixed"), 2.0 *
    RBF(length_scale=0.5), kernel_white, 2.0 * RBF(length_scale=[0.5, 2.0]),
    2.0 * Matern(length_scale=0.33, length_scale_bounds="fixed"),
    2.0 * Matern(length_scale=0.5, nu=0.5),
    2.0 * Matern(length_scale=1.5, nu=1.5),
    2.0 * Matern(length_scale=2.5, nu=2.5),
    2.0 * Matern(length_scale=[0.5, 2.0], nu=0.5),
    3.0 * Matern(length_scale=[2.0, 0.5], nu=1.5),
    4.0 * Matern(length_scale=[0.5, 0.5], nu=2.5),
    RationalQuadratic(length_scale=0.5, alpha=1.5),
    ExpSineSquared(length_scale=0.5, periodicity=1.5),
    DotProduct(sigma_0=2.0),
    DotProduct(sigma_0=2.0)**2,
    RBF(length_scale=[2.0]),
    Matern(length_scale=[2.0])
]
for metric in PAIRWISE_KERNEL_FUNCTIONS:
    if metric in ["additive_chi2", "chi2"]:
Beispiel #20
0
import matplotlib.pyplot as plt
"""
defining inputs:
    The continous input variables could be in R^n , with an arbitrary n
    "m" is the number of categorical input variables 
    The out put variable, assuming one out put
    Number of measurments (budjet)
    The surrogate model(type)
    Acquisition function(type)
    
"""
"""
Merging categorical inputs
input_dim = X.shape[1]
"""
kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)

# Initialize samples

noise = 0
bounds = np.array([[-0.5, 2]])
epsilon = 0  #exploration coefficient

X_init = np.array([[-0.4], [0.8], [2]])
Y1_init = dgen.f1(X_init)
Y2_init = dgen.f2(X_init)
Y3_init = dgen.f3(X_init)

X1_sample = X_init
Y1_sample = Y1_init
X2_sample = X_init
        a_2h = params[1:2]
        con = params[2:3]
        alpha = params[3:4]
        beta = params[4:5]
        gamma = np.array([GAMMA])
        return shear_model.stacked_excess_surface_density(
            SIM_DATA.radii, a_sz, a_2h, con, alpha, beta, gamma).squeeze()

    esds = _pool_map(wrapped_esd_func, lh)

    data = SIM_DATA.radii[:, None] * esds
    emulator = maszcal.emulate.PcaEmulator.create_from_data(
        coords=lh,
        data=data,
        interpolator_class=maszcal.interpolate.GaussianProcessInterpolator,
        interpolator_kwargs={'kernel': Matern()},
        num_components=NUM_PRINCIPAL_COMPONENTS,
    )

    print('Saving emulator error samples...')

    emulator_errs = get_emulator_errors(PARAM_MINS, PARAM_MAXES, emulator,
                                        wrapped_esd_func)

    save_arr(emulator_errs, SETUP_SLUG + '-emulation-errors')

    cov, fisher = get_covariance_and_fisher()
    prefactor = 1 / np.log(
        (2 * np.pi)**(cov.shape[0] / 2) * np.sqrt(np.linalg.det(cov)))

    sim_stack = SIM_DATA.radii * (SIM_DATA.wl_signals.mean(axis=(1, 2)))
#%% 2. Gaussian Procecss
# Hyperparameters are automatically optimized!
import numpy as np
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern, Exponentiation
from sklearn import tree
cartTree = tree.DecisionTreeClassifier()
from sklearn.ensemble import BaggingClassifier
cartTree_bagging = BaggingClassifier(cartTree,
                                     max_samples=0.7,
                                     max_features=1.0)
# Full kernel list:
# http://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process
kernelList = [
    ['RBF', RBF()],
    ['Matern', Matern()],
    #['Exponentiation',Exponentiation()],
    #['Constant', ConstantKernel()]
]

# List of Gaussian Processes
gpList = []
for name, gp_kernel in kernelList:
    gpc = GaussianProcessClassifier(kernel=gp_kernel,
                                    multi_class='one_vs_one',
                                    n_jobs=-1)
    #gpc = GaussianProcessClassifier(kernel=gp_kernel,multi_class='one_vs_rest',n_jobs=-1)
    gpList.append(['GP_' + name, gpc])

# List of all models
# Here we add all GP into usedModelList
Beispiel #23
0
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
#train_data = np.log(train_data)
X_train, Y_train = reshape_dataset(train_data, lags, steps_ahead)
X_test, Y_test = reshape_dataset(test_data, lags, steps_ahead)

week_data = [' ' for i in range(len(train_data))]
for i in range(len(train_data)):
    if i % 8 == 0:
        week_data[i] = str(int(data[i, 0])) + '-' + str(int(data[i, 1]))

X_train_weeks, Y_train_weeks = reshape_dataset(week_data, lags, steps_ahead)

#kernel = C()*RBF() + WhiteKernel()
kernel = C() * Matern() + WhiteKernel()
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
#gp.fit(X_train, Y_train)

training_size = int(X_train.shape[0] * 0.5)
validation_size = X_train.shape[0] - training_size
j = training_size
validation_predictions = np.zeros(validation_size)
for i in range(validation_size):
    gp.fit(X_train[i:j], Y_train[i:j])
    validation_predictions[i] = gp.predict(np.array([X_train[j]]))
    j += 1

validation_predictions = scaler.inverse_transform(validation_predictions)
Y_train = scaler.inverse_transform(Y_train)
#mape = np.mean(np.abs((Y_train[training_size:] - validation_predictions) / Y_train[training_size:])) * 100
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MaxAbsScaler, RobustScaler
from tpot.export_utils import set_param_recursive

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=123)

# Average CV score on the training set was: 0.9939280089200635
exported_pipeline = make_pipeline(
    RobustScaler(),
    MaxAbsScaler(),
    GaussianProcessRegressor(kernel=Matern(length_scale=3.7, nu=2.5), n_restarts_optimizer=85, normalize_y=False)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 123)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
def tc_tracker(img_folder, det_path, ROI_path, save_path, seq_name, result_save_path, video_save_path):
    param = Param()
    rand_color = np.random.rand(1500, 3)
    img_list = sorted(list(filter(lambda x : x if x.endswith('jpg') else None, os.listdir(img_folder))), key=lambda x:x[3:8])
    # img_list = img_list[:500]
    if not ROI_path:
        temp_img = cv.imread(os.path.join(img_folder, img_list[0]))
        mask = np.ones((temp_img.shape[0], temp_img.shape[1]))
        size = (temp_img.shape[1], temp_img.shape[0])
    else:
        mask = cv.imread(ROI_path)
        size = mask.shape

    fourcc = cv.VideoWriter_fourcc(*'XVID')
    out = cv.VideoWriter(video_save_path+'output.avi', fourcc, 30, size)
    # Read detection file
    print('Read detection file...')
    detections = np.genfromtxt(det_path, delimiter=',')
    M = np.zeros((detections.shape[0], 10))
    for i in range(7):
        if i != 6:
            M[:, i] = np.round(detections[:, i]).astype(np.int16)
        else:
            M[:, i] = detections[:, i]

    M[:, 2] = M[:, 2] + 1
    M[:, 3] = M[:, 3] + 1
    track_p = Track_params(mask.shape, len(img_list), 10, param.IOU_thresh, 0.8, 0.3, 0, param.color_thresh, param.det_score_thresh)
    for i in range(M.shape[0]):
        M[i, 4] = min(M[i ,4], track_p.img_size[1]-M[i, 2]+1)
        M[i, 5] = min(M[i, 5], track_p.img_size[0]-M[i, 3]+1)

    track_s = Track_struct(mask.shape, len(img_list), 10, param.IOU_thresh, 0.8, 0.3, 0, param.color_thresh, param.det_score_thresh)
    for i in range(1, track_p.num_fr+1):
        print('frame {}'.format(i))
        temp_M = M[M[:, 0] == i]
        idx = temp_M[:, 6] > track_p.det_scope_thresh
        det_bbox = temp_M[idx, 2:6]
        temp_score = temp_M[idx, 6]
        idx = idx[idx]

        _, choose_idx = mergeBBOx(det_bbox, 0.8, temp_score)

        idx = idx[choose_idx]

        mask_flag = np.ones((idx.shape[0], 1))
        left_pts = np.round([det_bbox[idx, 0], det_bbox[idx, 1] + det_bbox[idx, 3] - 1])
        right_pts = np.round([det_bbox[idx, 0] + det_bbox[idx, 2] - 1, det_bbox[idx, 1] + det_bbox[idx, 3] - 1])

        right_idx = (right_pts[0, :] - 1) * track_p.img_size[0] + right_pts[1, :]
        left_idx = (left_pts[0, :] - 1) * track_p.img_size[0] + left_pts[1, :]

        right_idx[right_idx < 0] = 1
        left_idx[left_idx < 0] = 1

        mask = mask.reshape([-1])
        a = mask[right_idx.astype(np.int)] > 0.5 
        b = mask[left_idx.astype(np.int)] > 0.5
        mask_flag = mask_flag[a+b]
        mask_flag = mask_flag[temp_score > track_p.det_scope_thresh]

        if not idx.all():
            continue
        track_s.track_obj.append(Track_obj(bbox=det_bbox[idx], det_score=temp_score[idx], mask_flag=mask_flag))
    print('Finishing reading...')
    
    tic = time.time()
    # forward tracking
    for i in range(1, track_p.num_fr):
            
        if i == 1:
            img1 = cv.imread(os.path.join(img_folder, img_list[i-1]))/255
            b, g, r = cv.split(img1)
            img1 = cv.merge([r, g, b])
        img2 = cv.imread(os.path.join(img_folder, img_list[i]))/255
        b, g, r = cv.split(img2)
        img2 = cv.merge([r, g, b])
        track_s.track_obj[i-1], track_s.track_obj[i], track_s.tracklet_mat, track_s.track_params = forward_tracking(
            track_s.track_obj[i-1], track_s.track_obj[i], track_s.track_params, i, track_s.tracklet_mat,img1, img2)

        print('forward {}'.format(i))
        img1 = img2

    iters = 10
    track_s.tracklet_mat = preprocessing(track_s.tracklet_mat, 5)
    for i in range(iters):
        track_s.tracklet_mat, flag, _ = trackletClusterInit(track_s.tracklet_mat, param)
        print('iter_n = {}'.format(i))
        if flag == 1:
            break 

    track_s.prev_tracklet_mat, track_s.tracklet_mat = postProcessing(track_s.tracklet_mat, track_s.track_params)

    sigma = 8
    remove_idx = []
    N_tracklet = track_s.tracklet_mat.xmin_mat.shape[0]
    xmin_reg = [[] for _ in range(N_tracklet)]
    ymin_reg = [[] for _ in range(N_tracklet)]
    xmax_reg = [[] for _ in range(N_tracklet)]
    ymax_reg = [[] for _ in range(N_tracklet)]
    for i in range(N_tracklet):
        det_idx = np.where(track_s.tracklet_mat.xmin_mat[i, :] >= 0)[0]
        print('forward_n = {}'.format(i))
        if len(det_idx) < track_s.track_params.const_fr_thresh:
            remove_idx.append(i)
            continue
        # kernel = C(0.1, (0.001, 0.1)) * RBF(0.5, (1e-4, 10))
        kernel = Matern(nu=2.5, length_scale_bounds=(1000,1000))
        model_xmin = gpr(kernel=kernel)
        model_ymin = gpr(kernel=kernel)
        model_xmax = gpr(kernel=kernel)
        model_ymax = gpr(kernel=kernel)
        xmin_reg[i] = model_xmin.fit(det_idx.reshape(-1,1)+1, track_s.tracklet_mat.xmin_mat[i, det_idx].reshape(-1,1))
        ymin_reg[i] = model_ymin.fit(det_idx.reshape(-1,1)+1, track_s.tracklet_mat.ymin_mat[i, det_idx].reshape(-1,1))
        xmax_reg[i] = model_xmax.fit(det_idx.reshape(-1,1)+1, track_s.tracklet_mat.xmax_mat[i, det_idx].reshape(-1,1))
        ymax_reg[i] = model_ymax.fit(det_idx.reshape(-1,1)+1, track_s.tracklet_mat.ymax_mat[i, det_idx].reshape(-1,1))

        t_min = np.min(det_idx)
        t_max = np.max(det_idx)

        track_s.tracklet_mat.xmin_mat[i, t_min:t_max+1] = xmin_reg[i].predict(np.arange(t_min, t_max+1).reshape(-1,1)+1).reshape((-1,))
        track_s.tracklet_mat.ymin_mat[i, t_min:t_max+1] = ymin_reg[i].predict(np.arange(t_min, t_max+1).reshape(-1,1)+1).reshape((-1,))
        track_s.tracklet_mat.xmax_mat[i, t_min:t_max+1] = xmax_reg[i].predict(np.arange(t_min, t_max+1).reshape(-1,1)+1).reshape((-1,))
        track_s.tracklet_mat.ymax_mat[i, t_min:t_max+1] = ymax_reg[i].predict(np.arange(t_min, t_max+1).reshape(-1,1)+1).reshape((-1,))

    track_s.tracklet_mat.xmin_mat = delete_matrix(track_s.tracklet_mat.xmin_mat, remove_idx, 2)
    track_s.tracklet_mat.ymin_mat = delete_matrix(track_s.tracklet_mat.ymin_mat, remove_idx, 2)
    track_s.tracklet_mat.xmax_mat = delete_matrix(track_s.tracklet_mat.xmax_mat, remove_idx, 2)
    track_s.tracklet_mat.ymax_mat = delete_matrix(track_s.tracklet_mat.ymax_mat, remove_idx, 2)
    track_s.tracklet_mat.color_mat = delete_matrix(track_s.tracklet_mat.color_mat, remove_idx, 3)
    # track_s.tracklet_mat.class_mat = delete_matrix(track_s.tracklet_mat.class_mat, remove_idx, 2)
    track_s.tracklet_mat.det_score_mat = delete_matrix(track_s.tracklet_mat.det_score_mat, remove_idx, 2)
    print('finish tracking ....')
    toc = time.time()


    for t in range(track_p.num_fr):
        frame = cv.imread(os.path.join(img_folder, img_list[t]))
        for i in range(track_s.tracklet_mat.xmin_mat.shape[0]):
            if track_s.tracklet_mat.xmin_mat[i, t] == -1:
                continue
            
            x_min = track_s.tracklet_mat.xmin_mat[i, t]
            y_min = track_s.tracklet_mat.ymin_mat[i, t]
            x_max = track_s.tracklet_mat.xmax_mat[i, t]
            y_max = track_s.tracklet_mat.ymax_mat[i, t]


            font = cv.FONT_HERSHEY_DUPLEX
            frame = cv.rectangle(frame, (int(x_min), int(y_min)),
                        (int(x_max), int(y_max)),
                        (0, 255, 0), 3)
            frame = cv.putText(frame, str(t), (0,50), font, 1, (0,0,255), 1)
            frame = cv.putText(frame, str(i), (int(x_min), int(y_min)-6), font, 2, (0,0,255), 2)

        cv.imwrite( save_path + '/img{:0>6}.jpg'.format(t), frame)
        out.write(frame)
        # cv.imshow('video_name', frame)
        # cv.waitKey(40)
    

    # fourcc = cv.VideoWriter_fourcc(*'XVID')
    # frame_ = cv.imread(os.path.join(save_path, list(os.listdir(save_path))[0]))
    # size = (frame_.shape[1], frame_.shape[0])
    # print(size)
    # out = cv.VideoWriter(video_save_path+'output.avi', fourcc, 20, size)
    # print(sorted(list(filter(lambda x : x if x.endswith('jpg') else None, os.listdir(save_path))), key=lambda x:x[3:]))
    # for x in sorted(list(filter(lambda x : x if x.endswith('jpg') else None, os.listdir(save_path))), key=lambda x:x[3:]):
        # frame_ = cv.imread(os.path.join(save_path, x))
        # out.write(frame_)
        # cv.imshow('video_name', frame_)
        # cv.waitKey(0)

    out.release()

    Speed = len(img_list) / (toc - tic)
    if seq_name:
        writetxt(seq_name, track_s, result_save_path, Speed)

    return 0
Beispiel #26
0
def GP(params, Runs, pool):
    def upper_confidence_bound(mu_x, sigma_x, opt_value, kappa=-1.0):
        return mu_x + kappa * sigma_x

    def query(xi, yi, gp):
        acq = upper_confidence_bound
        best_value = np.inf
        for N in np.linspace(1, params['N_Max']):
            if params['T'] > 0:
                for T in np.linspace(0, params['T_Max']):

                    def obj(x):
                        x = x.reshape(1, -1)
                        mu_x, sigma_x = gp.predict(x, return_std=True)
                        return acq(mu_x, sigma_x, np.min(yi))

                    x0 = np.asanyarray([N, T]).reshape(1, 2)
                    bounds = ((1, params['N_Max']), (0, params['T_Max']))
                    print(x0, bounds)
                    res = minimize(obj, x0, bounds=bounds)

                    if res.fun < best_value:
                        best_value = res.fun
                        query_point = res.x
            else:

                def obj(x):
                    x = x.reshape(1, -1)
                    mu_x, sigma_x = gp.predict(x, return_std=True)
                    return acq(mu_x, sigma_x, np.min(yi))

                x0 = np.asanyarray(N).reshape(1, -1)
                bounds = [(1, params['N_Max'])]
                res = minimize(obj, x0, bounds=bounds)
                if res.fun < best_value:
                    best_value = res.fun
                    query_point = res.x
        query_point = query_point
        return query_point

    for i in range(params['Searches']):
        kernel = Matern(length_scale_bounds="fixed")
        gp = GaussianProcessRegressor(kernel=kernel,
                                      alpha=Runs['STD'].values,
                                      random_state=1,
                                      normalize_y=True)
        if params['T'] > 0:
            gp.fit(Runs[['N', 'T']].values, Runs['MSE'].values)
            next_x = query(Runs[['N', 'T']].values, Runs['MSE'].values, gp)
            N = int(np.round(next_x[0], 0))
            T = int(np.round(next_x[1], 0))
            o = 0
            while len(
                    Runs.loc[(Runs['N'] == N) & (Runs['T'] == T)].index) != 0:
                print('Adjust!')
                o += 1
                N += int(o * np.cos(o * np.pi))
                if N < params['N_Min'] or N > params['N_Max']:
                    N -= int(o * np.cos(o * np.pi))
                if o > 5:
                    T += 1
            print(N, T)
            d = {'N': N, 'T': T, 'MSE': 0, 'STD': 0}
            idx = Runs.index[-1] + 1
            D2 = pd.DataFrame(data=d, index=[idx])
            Runs = Runs.append(D2)
            params['T'] = T
            params['N'] = N
            Results = RunReps(params, pool)
            MSE = Results[0]
            Runs['MSE'][idx] = MSE.mean()
            Runs['STD'][idx] = MSE.std()
            Runs = Runs.sort_values(by=['N', 'T']).reset_index(drop=True)
        else:
            gp.fit(Runs['N'].values.reshape(-1, 1), Runs['MSE'].values)
            next_x = query(Runs['N'].values, Runs['MSE'].values, gp)
            N = int(np.round(next_x[0], 0))
            o = 0
            while len(Runs.loc[Runs['N'] == N].index) != 0:
                print('Adjust!')
                o += 1
                N += int(o * np.cos(o * np.pi))
                if N < params['N_Min'] or N > params['N_Max']:
                    N -= int(o * np.cos(o * np.pi))
            print(N)
            d = {'N': N, 'MSE': 0, 'STD': 0}
            idx = Runs.index[-1] + 1
            D2 = pd.DataFrame(data=d, index=[idx])
            Runs = Runs.append(D2)
            params['N'] = N
            Results = RunReps(params, pool)
            MSE = Results[0]
            Runs['MSE'][idx] = MSE.mean()
            Runs['STD'][idx] = MSE.std()
            Runs = Runs.sort_values(by=['N']).reset_index(drop=True)
    return (Runs)
Beispiel #27
0
def OptimizeCBASensitivity(infile_path, outdir, do_plots = True):
    data_slice = TrainingConfig.training_slice
    slice_size = data_slice[1] - data_slice[0]

    # read the test dataset, which will be used to get the expected sensitivity of the analysis
    sig_samples = TrainingConfig.sig_samples
    bkg_samples = TrainingConfig.bkg_samples

    print("loading data ...")
    sig_data = [pd.read_hdf(infile_path, key = sig_sample) for sig_sample in sig_samples]
    bkg_data = [pd.read_hdf(infile_path, key = bkg_sample) for bkg_sample in bkg_samples]

    sig_data_train = []
    sig_mBB_train = []
    sig_weights_train = []
    sig_aux_data_train = []
    for sample in sig_data:
        cur_length = len(sample)
        sample = sample.sample(frac = 1, random_state = 12345).reset_index(drop = True) # shuffle the sample
        cur_train = sample[int(data_slice[0] * cur_length) : int(data_slice[1] * cur_length)]
        cur_traindata, cur_nuisdata, cur_weights = TrainNuisAuxSplit(cur_train) # load the standard classifier input, nuisances and weights
        cur_aux_data = cur_train[TrainingConfig.other_branches].values
        sig_data_train.append(cur_traindata)
        sig_mBB_train.append(cur_nuisdata)
        sig_weights_train.append(cur_weights / slice_size)
        sig_aux_data_train.append(cur_aux_data)

    bkg_data_train = []
    bkg_mBB_train = []
    bkg_weights_train = []
    bkg_aux_data_train = []
    for sample in bkg_data:
        cur_length = len(sample)
        sample = sample.sample(frac = 1, random_state = 12345).reset_index(drop = True) # shuffle the sample
        cur_train = sample[int(data_slice[0] * cur_length) : int(data_slice[1] * cur_length)]
        cur_traindata, cur_nuisdata, cur_weights = TrainNuisAuxSplit(cur_train) # load the standard classifier input, nuisances and weights
        cur_aux_data = cur_train[TrainingConfig.other_branches].values
        bkg_data_train.append(cur_traindata)
        bkg_mBB_train.append(cur_nuisdata)
        bkg_weights_train.append(cur_weights / slice_size)
        bkg_aux_data_train.append(cur_aux_data)

    # also prepare the total, concatenated versions
    data_train = sig_data_train + bkg_data_train
    aux_train = sig_aux_data_train + bkg_aux_data_train
    weights_train = sig_weights_train + bkg_weights_train
    samples = sig_samples + bkg_samples

    # define the SR binning for mBB
    SR_low = 30
    SR_up = 210
    SR_binwidth = 10
    SR_mBB_binning = np.linspace(SR_low, SR_up, num = 1 + int((SR_up - SR_low) / SR_binwidth), endpoint = True)

    print("mBB binning: {}".format(SR_mBB_binning))

    original_cuts = {"MET_cut": 200, "dRBB_highMET_cut": 1.2, "dRBB_lowMET_cut": 1.8}

    # the objective function that needs to be minimized
    costfunc = lambda cuts: -EvaluateAsimovSignificance(process_events = data_train, process_aux_events = aux_train, 
                                                        process_weights = weights_train, process_names = samples, 
                                                        signal_process_names = sig_samples, background_process_names = bkg_samples, 
                                                        binning = SR_mBB_binning, cuts = cuts, fit_dir = outdir)["combined"]
    
    costfunc_bayes = lambda MET_cut, dRBB_highMET_cut, dRBB_lowMET_cut: -costfunc({"MET_cut": MET_cut, "dRBB_highMET_cut": dRBB_highMET_cut, "dRBB_lowMET_cut": dRBB_lowMET_cut})
    
    # then, try a global search strategy
    ranges_bayes = {"MET_cut": (150, 250), "dRBB_highMET_cut": (0.5, 5.0), "dRBB_lowMET_cut": (0.5, 5.0)}
    gp_params = {'kernel': 1.0 * Matern(length_scale = 0.05, length_scale_bounds = (1e-1, 1e2), nu = 1.5)}
    optimizer = BayesianOptimization(
        f = costfunc_bayes,
        pbounds = ranges_bayes,
        random_state = None
    )
    optimizer.maximize(init_points = 20, n_iter = 1, acq = 'poi', kappa = 3, **gp_params)

    xi_scheduler = lambda iteration: 0.01 + 0.19 * np.exp(-0.004 * iteration)
    for it in range(400):
        cur_xi = xi_scheduler(it)
        print("using xi = {}".format(cur_xi))
        optimizer.maximize(init_points = 0, n_iter = 1, acq = 'poi', kappa = 3, xi = cur_xi, **gp_params)
    
    # print the results
    print("==============================================")
    print("initial cuts:")
    print("==============================================")
    print("MET_cut = {}".format(original_cuts["MET_cut"]))
    print("dRBB_highMET_cut = {}".format(original_cuts["dRBB_highMET_cut"]))
    print("dRBB_lowMET_cut = {}".format(original_cuts["dRBB_lowMET_cut"]))
    print("significance = {} sigma".format(costfunc_bayes(**original_cuts)))
    print("==============================================")

    print("==============================================")
    print("optimized cuts (global optimization):")
    print("==============================================")
    print("MET_cut = {}".format(optimizer.max["params"]["MET_cut"]))
    print("dRBB_highMET_cut = {}".format(optimizer.max["params"]["dRBB_highMET_cut"]))
    print("dRBB_lowMET_cut = {}".format(optimizer.max["params"]["dRBB_lowMET_cut"]))
    print("significance = {} sigma".format(optimizer.max["target"]))
    print("==============================================")

    # save the results:
    with open(os.path.join(outdir, "opt_results.pkl"), "wb") as opt_outfile:
        pickle.dump(optimizer.max, opt_outfile)
Beispiel #28
0
pylab.figure(0, figsize=(14, 12))
pylab.subplot(3, 2, 1)
ymean, y_std = gp.predict(X, return_std=True)
pylab.plot(X, ymean, 'k', lw=3, zorder=9, label="mean")
pylab.fill_between(X[:, 0], ymean - y_std, ymean + y_std, alpha=0.5, color='k')
y_samples = gp.sample_y(X, 10)
pylab.plot(X, y_samples, color='b', lw=2)
pylab.plot(X, y_samples[:, 0], color='b', lw=2, label="sample")
pylab.legend(loc="best")
pylab.xlim(0, 5)
pylab.ylim(-3, 3)
pylab.title("Prior Samples")

#Matern
kernel = C(1.0) * Matern(length_scale=1, nu=1.5)
gp = GaussianProcessRegressor(kernel=kernel,
                              alpha=1e-5,
                              n_restarts_optimizer=10)

pylab.subplot(3, 2, 2)
ymean, y_std = gp.predict(X, return_std=True)
pylab.plot(X, ymean, 'k', lw=3, zorder=9, label="mean")
pylab.fill_between(X[:, 0], ymean - y_std, ymean + y_std, alpha=0.5, color='k')
y_samples = gp.sample_y(X, 10)
pylab.plot(X, y_samples, color='r', lw=2)
pylab.plot(X, y_samples[:, 0], color='r', lw=2, label="sample")
pylab.legend(loc="best")
pylab.xlim(0, 5)
pylab.ylim(-3, 3)
pylab.title("Prior Samples")
    def gp(self, true_graph, samples_idx):
        rbf = 1.0 * RBF(length_scale=1.0)
        matern = 1.0 * Matern(
            length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5)
        gp_opt = GaussianProcessRegressor(kernel=rbf)
        gp_opt.fit(self.training_x_, self.training_y_)
        self.gpr_ = gp_opt
        # print("The trained hyperparameter are {}".format((gp_opt.kernel_.theta)))
        # print("Log Marginal Likelihood (optimized): %.3f"
        #     % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

        # Contour 3d
        x1_ = [i for i in range(self.rows_ + 1)]
        x2_ = [j for j in range(self.cols_ + 1)]
        X1_, X2_ = np.meshgrid(x1_, x2_)

        y_mean = np.empty([self.rows_ + 1, self.cols_ + 1])
        y_true = np.empty([self.rows_ + 1, self.cols_ + 1])
        y_std = np.empty([self.rows_ + 1, self.cols_ + 1])
        y_mean_u = np.empty([self.rows_ + 1, self.cols_ + 1])
        y_mean_d = np.empty([self.rows_ + 1, self.cols_ + 1])
        for i in range(self.rows_ + 1):
            for j in range(self.cols_ + 1):
                cur_idx = (self.rows_ - X1_[i][j] - 1) * self.cols_ + X2_[i][j]
                # print("X: {}, Y: {}, idx: {}".format(X2_[i][j], X1_[i][j],cur_idx))
                if X1_[i][j] < self.rows_ and X1_[i][j] >= 0 and X2_[i][
                        j] < self.cols_ and X2_[i][j] >= 0 and cur_idx in nz_ig:
                    y_mean[X2_[i][j], X1_[i][j]], y_std[
                        X2_[i][j], X1_[i][j]] = gp_opt.predict(
                            [self.vertex_[cur_idx].probs_], return_std=True)
                    # print("Prediction ========================")
                    # print("Vertex {}, X:{}, Y:{}".format(cur_idx,X2_[i][j],X1_[i][j]))
                    # print("Testing data is {}".format(self.vertex_[cur_idx].probs_))
                    # print("Predicted IG {}".format(y_mean[X2_[i][j],X1_[i][j]]))
                else:
                    y_mean[X2_[i][j], X1_[i][j]] = 0
                    y_std[X2_[i][j], X1_[i][j]] = 0.0

                y_mean_u[X2_[i][j],
                         X1_[i][j]] = y_mean[X2_[i][j],
                                             X1_[i][j]] + y_std[X2_[i][j],
                                                                X1_[i][j]]
                y_mean_d[X2_[i][j],
                         X1_[i][j]] = y_mean[X2_[i][j],
                                             X1_[i][j]] - y_std[X2_[i][j],
                                                                X1_[i][j]]

                if X2_[i][j] < self.cols_ and X1_[i][j] < self.rows_ and X1_[
                        i][j] >= 0 and X2_[i][j] >= 0:
                    idx_ = (self.rows_ - X1_[i][j] -
                            1) * self.cols_ + X2_[i][j]
                    y_true[X2_[i][j], X1_[i][j]] = true_graph.vertex_[idx_].ig_
                    self.vertex_[idx_].ig_ = y_mean[X2_[i][j], X1_[i][j]]
                    true_graph.vertex_[idx_].L2_error_ = (
                        true_graph.vertex_[idx_].ig_ -
                        self.vertex_[idx_].ig_)**2

                    self.vertex_[idx_].ig_ub_ = y_mean[
                        X2_[i][j], X1_[i][j]] + y_std[X2_[i][j], X1_[i][j]]
                    self.vertex_[idx_].ig_lb_ = y_mean[
                        X2_[i][j], X1_[i][j]] - y_std[X2_[i][j], X1_[i][j]]
                    if round(true_graph.vertex_[idx_].ig_, 3) <= round(
                            self.vertex_[idx_].ig_ub_, 3) and round(
                                true_graph.vertex_[idx_].ig_, 3) >= round(
                                    self.vertex_[idx_].ig_lb_, 3):
                        self.vertex_[idx_].ig_pred_ = True
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.model_selection import train_test_split

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=123)

# Average CV score on the training set was: 0.9747360826983117
exported_pipeline = GaussianProcessRegressor(kernel=Matern(
    length_scale=3.4000000000000004, nu=2.5),
                                             n_restarts_optimizer=55,
                                             normalize_y=False)
# Fix random state in exported estimator
if hasattr(exported_pipeline, 'random_state'):
    setattr(exported_pipeline, 'random_state', 123)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)