Exemple #1
0
def advi_callback(params, t, g, results, delta_results, model, eval_function,
                  hparams):
    results.append(eval_function(params))

    if (t + 1) % hparams['advi_callback_iteration'] == 0:

        if len(results) > hparams['advi_callback_iteration']:
            previous_elbo = results[-(hparams['advi_callback_iteration'] + 1)]
        else:
            previous_elbo = 0.0

        current_elbo = results[-1]
        delta_results.append(relative_difference(previous_elbo, current_elbo))
        delta_elbo_mean = np.nanmean(delta_results)
        delta_elbo_median = np.nanmedian(delta_results)

        if ((delta_elbo_median <= hparams['advi_convergence_threshold']) |
            (delta_elbo_mean <= hparams['advi_convergence_threshold'])):
            tqdm.write(f"Converged early according to ADVI "
                       f"metrics for Median/Mean")
            tqdm.write(f"Iteration {t+1}")
            tqdm.write(f"Rel. tolerance Δ threshold: "
                       f"{hparams['advi_convergence_threshold']}")
            tqdm.write(f"Rel. tolerance Δ mean: {delta_elbo_mean:.5f}")
            tqdm.write(f"Rel. tolerance Δ median: {delta_elbo_median:.5f}")
            return "exit"
    return None
Exemple #2
0
def _get_dx_wrt(dx, var, dx_scaling, dx_func=None):
    """Scale `dx` for a particular variable `var`."""
    assert dx_scaling in [
        "none",
        "median",
        "custom",
    ], "`dx_scaling` must be 'none' or 'median'."
    if dx_scaling == "none":
        dx_wrt = dx
    elif dx_scaling == "median":
        median_var = np.nanmedian(var)
        if median_var == 0:
            dx_wrt = dx
        else:
            dx_wrt = dx * np.abs(median_var)
    elif dx_scaling == "custom":
        dx_wrt = dx_func(var)
    return dx_wrt
Exemple #3
0
def adam_workflow_optimize(n_iters,
                           objective_and_grad,
                           init_param,
                           K,
                           has_log_norm=False,
                           window=100,
                           learning_rate=.01,
                           learning_rate_end=None,
                           epsilon=.02,
                           averaging=True,
                           n_optimisers=1,
                           r_mean_threshold=1.20,
                           r_sigma_threshold=1.20,
                           tail_avg_iters=200,
                           eval_elbo=100,
                           tolerance=0.01,
                           stopping_rule=1,
                           plotting=True,
                           model_name=None):
    """
    stopping rule 1 means traditional ELBO stopping rule, while
    stopping rule 2 means MCSE stopping rule.

    The windowed ADAM optimizer with the convergence diagnostics and iterate averaging ...

    :param n_iters:
    :param objective_and_grad:
    :param init_param: initial params
    :param K:
    :param has_log_norm:
    :param window:
    :param learning_rate:
    :param epsilon:
    :param rhat_window:
    :param averaging:
    :param n_optimisers:
    :param r_mean_threshold:
    :param r_sigma_threshold:
    :param tail_avg_iters:
    :param eval_elbo:
    :param tolerance:
    :param stopping_rule:
    :param avg_grad_norm:
    :param learning_rate_end:
    :param plotting:
    :param model_name:
    :return:
    """

    optimisation_log = {}
    variational_param_post_conv_history_list = []
    # index for iters
    t = 0
    # index for iters after convergence ..
    j = 0
    N_overall = 50000
    sto_process_convergence = False
    sto_process_sigma_conv = False
    sto_process_mean_conv = False

    value_history = []
    log_norm_history = []
    variational_param = init_param.copy()
    averaged_variational_param_history = []
    start_avg_iter = n_iters // 1.3
    variational_param_history_list = []
    averaged_variational_mean_list = []
    averaged_variational_sigmas_list = []
    grad_val = 0.
    grad_squared = 0
    beta1 = 0.9
    beta2 = 0.999
    prev_elbo = 0.
    pmz_size = init_param.size
    mcse_all = np.zeros(pmz_size)

    for o in range(n_optimisers):
        np.random.seed(seed=o)
        if o >= 1:
            variational_param = init_param + stats.norm.rvs(
                size=len(init_param)) * (o + 1) * 0.5
        elbo_diff_rel_med = 10.
        elbo_diff_rel_avg = 10.
        local_grad_history = []
        local_log_norm_history = []
        value_history = []
        log_norm_history = []
        averaged_variational_mean_list = []
        averaged_variational_sigmas_list = []
        elbo_diff_rel_list = []
        variational_param = init_param.copy()
        t = 0
        variational_param_history = []
        variational_param_post_conv_history = []
        mcse_all = np.zeros((pmz_size, 1))
        stop = False

        with tqdm.trange(n_iters) as progress:
            try:
                schedule = learning_rate_schedule(n_iters, learning_rate,
                                                  learning_rate_end)
                for i, curr_learning_rate in zip(progress, schedule):
                    if i == N_overall:
                        break

                    if sto_process_convergence:
                        j = j + 1
                    if has_log_norm == 1:
                        obj_val, obj_grad, log_norm = objective_and_grad(
                            variational_param)
                    else:
                        obj_val, obj_grad = objective_and_grad(
                            variational_param)
                        log_norm = 0

                    if stopping_rule == 1 and i > 1000 and i % eval_elbo == 0:
                        elbo_diff_rel = np.abs(obj_val -
                                               prev_elbo) / (prev_elbo + 1e-8)
                        elbo_diff_rel_list.append(elbo_diff_rel)
                        elbo_diff_rel_med = np.nanmedian(elbo_diff_rel_list)
                        elbo_diff_rel_avg = np.nanmean(elbo_diff_rel_list)

                    prev_elbo = obj_val
                    start_stats = 1000
                    mcse_se_combined_list = np.zeros((pmz_size, 1))
                    if stopping_rule == 2 and i > 1000 and i % eval_elbo == 0:
                        mcse_se_combined_list = monte_carlo_se(
                            np.array(variational_param_history)[None, :], 0)
                        mcse_all = np.hstack(
                            (mcse_all, mcse_se_combined_list[:, None]))

                    value_history.append(obj_val)
                    local_grad_history.append(obj_grad)
                    local_log_norm_history.append(log_norm)
                    log_norm_history.append(log_norm)
                    if len(local_grad_history) > window:
                        local_grad_history.pop(0)
                        local_log_norm_history.pop(0)

                    if has_log_norm:
                        grad_norm = np.exp(log_norm)
                    else:
                        grad_norm = np.sum(obj_grad**2, axis=0)
                    if i == 0:
                        grad_squared = 0.9 * obj_grad**2
                        grad_val = 0.9 * obj_grad
                    else:
                        grad_squared = grad_squared * beta2 + (
                            1. - beta2) * obj_grad**2
                        grad_val = grad_val * beta1 + (1. - beta1) * obj_grad
                    grad_scale = np.exp(
                        np.min(local_log_norm_history) -
                        np.array(local_log_norm_history))
                    scaled_grads = grad_scale[:, np.newaxis] * np.array(
                        local_grad_history)
                    accum_sum = np.sum(scaled_grads**2, axis=0)
                    old_variational_param = variational_param.copy()
                    m_hat = grad_val / (1 - np.power(beta1, i + 2))
                    v_hat = grad_squared / (1 - np.power(beta2, i + 2))
                    variational_param = variational_param - curr_learning_rate * m_hat / np.sqrt(
                        epsilon + v_hat)
                    if averaging is True and i > start_avg_iter:
                        averaged_variational_param = (
                            variational_param + old_variational_param *
                            (i - start_avg_iter)) / (i - start_avg_iter + 1)
                        averaged_variational_param_history.append(
                            averaged_variational_param)

                    if i > 100:
                        variational_param_history.append(old_variational_param)

                    if len(variational_param_history) > 100 * window:
                        variational_param_history.pop(0)
                    if i % 100 == 0:
                        avg_loss = np.mean(value_history[max(0, i - 1000):i +
                                                         1])
                        #print(avg_loss)
                        progress.set_description(
                            'Average Loss = {:,.6g}'.format(avg_loss))

                    t = t + 1
                    if stopping_rule == 1 and stop == False and elbo_diff_rel_med <= epsilon:
                        print('Convergence achieved due to ELBO median')
                        N_overall = i + 100
                        stop = True
                    if stopping_rule == 1 and stop == False and elbo_diff_rel_avg <= epsilon:
                        print('Convergence achieved due to ELBO mean')
                        N_overall = i + 100
                        stop = True

                    if stopping_rule == 2 and stop == False and sto_process_convergence == True and i > 1500 and \
                            t % eval_elbo == 0 and (np.nanmedian(mcse_all[:, -1]) <= epsilon) and j > 500:
                        print('Optimization stopping reliably!')
                        stop = True
                        break

                    variational_param_history_array = np.array(
                        variational_param_history)
                    if stopping_rule == 2 and t % eval_elbo == 0 and t > 1000 and sto_process_convergence == False:
                        variational_param_history_list.append(
                            variational_param_history_array)
                        variational_param_history_chains = np.stack(
                            variational_param_history_list, axis=0)
                        variational_param_history_list.pop(0)
                        rhats_halfway_last = compute_R_hat(
                            variational_param_history_chains, warmup=0.5)[1]
                        rhat_mean_halfway, rhat_sigma_halfway = rhats_halfway_last[:K], rhats_halfway_last[
                            K:]
                        if (rhat_mean_halfway < r_mean_threshold
                            ).all() and sto_process_mean_conv == False:
                            start_swa_m_iters = i
                            print('Rhat- All mean converged ...')
                            sto_process_mean_conv = True
                            start_stats = start_swa_m_iters

                        if (rhat_sigma_halfway < r_sigma_threshold
                            ).all() and sto_process_sigma_conv == False:
                            start_swa_s_iters = i
                            print('Rhat- All sigmas converged ...')
                            sto_process_sigma_conv = True
                            start_stats = start_swa_s_iters

                    if sto_process_mean_conv == True and sto_process_sigma_conv == True:
                        sto_process_convergence = True
                        start_stats = np.maximum(start_swa_m_iters,
                                                 start_swa_s_iters)

                    if sto_process_convergence:
                        variational_param_post_conv_history.append(
                            variational_param)

                    if sto_process_convergence and j > 200 and t % eval_elbo == 0:
                        variational_param_post_conv_history_array = np.array(
                            variational_param_post_conv_history)
                        variational_param_post_conv_history_list.append(
                            variational_param_post_conv_history_array)
                        variational_param_post_conv_history_chains = np.stack(
                            variational_param_post_conv_history_list, axis=0)
                        variational_param_post_conv_history_list.pop(0)
                        pmz_size = variational_param_post_conv_history_chains.shape[
                            2]
                        Neff = np.zeros(pmz_size)
                        Rhot = []
                        khat_iterates = []
                        khat_iterates2 = []
                        # compute khat for iterates
                        for z in range(pmz_size):
                            neff, rho_t_sum, autocov, rho_t = autocorrelation(
                                variational_param_post_conv_history_chains, 0,
                                z)
                            Neff[z] = neff
                            Rhot.append(rho_t)
                            khat_i = compute_khat_iterates(
                                variational_param_post_conv_history_chains,
                                0,
                                z,
                                increasing=True)
                            khat_iterates.append(khat_i)
                            khat_i2 = compute_khat_iterates(
                                variational_param_post_conv_history_chains,
                                0,
                                z,
                                increasing=False)
                            khat_iterates2.append(khat_i2)

                        rhot_array = np.stack(Rhot, axis=0)
                        khat_combined = np.maximum(khat_iterates,
                                                   khat_iterates2)

            except (KeyboardInterrupt, StopIteration) as e:
                progress.close()
            finally:
                progress.close()

    if sto_process_convergence:
        optimisation_log['start_avg_mean_iters'] = start_swa_m_iters
        optimisation_log['start_avg_sigma_iters'] = start_swa_s_iters
        optimisation_log['r_hat_mean_halfway'] = rhat_mean_halfway
        optimisation_log['r_hat_sigma_halfway'] = rhat_sigma_halfway
        try:
            Neff
        except NameError:
            pass
        else:
            optimisation_log['neff'] = Neff
            optimisation_log['autocov'] = autocov
            optimisation_log['rhot'] = rhot_array
            optimisation_log['start_stats'] = start_stats
            # optimisation_log['mcmc_se2'] = mcmc_se2_array
            optimisation_log['khat_iterates_comb'] = khat_combined

    if stopping_rule == 1:
        start_stats = i - tail_avg_iters

    if stopping_rule == 1:
        variational_param_history_list.append(variational_param_history_array)
        variational_param_history_chains = np.stack(
            variational_param_history_list, axis=0)
        smoothed_opt_param = np.mean(
            variational_param_history_array[start_stats:, :], axis=0)
        averaged_variational_mean_list.append(smoothed_opt_param[:K])
        averaged_variational_sigmas_list.append(smoothed_opt_param[K:])

    elif stopping_rule == 2 and sto_process_convergence == True:
        smoothed_opt_param = np.mean(
            variational_param_post_conv_history_chains[0, :, :], axis=0)
        averaged_variational_mean_list.append(smoothed_opt_param[:K])
        averaged_variational_sigmas_list.append(smoothed_opt_param[K:])

    if stopping_rule == 2 and sto_process_convergence == False:
        start_stats = t - tail_avg_iters
        variational_param_history_list.append(variational_param_history_array)
        variational_param_history_chains = np.stack(
            variational_param_history_list, axis=0)
        smoothed_opt_param = np.mean(
            variational_param_history_array[start_stats:, :], axis=0)
        averaged_variational_mean_list.append(smoothed_opt_param[:K])
        averaged_variational_sigmas_list.append(smoothed_opt_param[K:])

    if plotting:
        fig = plt.figure(figsize=(4.2, 2.5))
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(rhot_array[0, :100], label='loc-1')
        ax.plot(rhot_array[1, :100], label='loc-2')
        #ax.plot(rhot_array[2, :100], label='loc-3')
        plt.xlabel('Lags')
        plt.ylabel('autocorrelation')
        plt.legend()
        plt.savefig('autocor_model_adam_mean_mf.pdf')

        fig = plt.figure(figsize=(4.2, 2.5))
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(rhot_array[K, :100], label='sigma-1')
        ax.plot(rhot_array[K + 1, :100], label='sigma-2')
        #ax.plot(rhot_array[K + 2, :100], label='sigma-3')
        plt.xlabel('Lags')
        plt.ylabel('autocorrelation')
        plt.legend()
        plt.savefig('autocor_model_adam_sigma_mf.pdf')

    return (variational_param, variational_param_history_chains,
            averaged_variational_mean_list, averaged_variational_sigmas_list,
            np.array(value_history), np.array(log_norm_history),
            optimisation_log)
Exemple #4
0
def adagrad_workflow_optimize(n_iters,
                              objective_and_grad,
                              init_param,
                              K,
                              has_log_norm=False,
                              window=10,
                              learning_rate=.01,
                              learning_rate_end=None,
                              epsilon=.1,
                              tolerance=0.05,
                              eval_elbo=100,
                              stopping_rule=1,
                              n_optimizers=1,
                              r_mean_threshold=1.20,
                              r_sigma_threshold=1.20,
                              tail_avg_iters=200,
                              plotting=False,
                              model_name=None):
    """
    stopping rule 1 means traditional ELBO stopping rule, while
    stopping rule 2 means MCSE stopping rule.

    The windowed Adagrad optimizer with convergence diagnostics and iterate averaging ...

    :param n_iters:
    :param objective_and_grad:
    :param init_param: initial params
    :param K:
    :param has_log_norm:
    :param window:
    :param learning_rate:
    :param epsilon:
    :param rhat_window:
    :param averaging:
    :param n_optimisers:
    :param r_mean_threshold:
    :param r_sigma_threshold:
    :param tail_avg_iters:
    :param eval_elbo:
    :param tolerance:
    :param stopping_rule:
    :param avg_grad_norm:
    :param learning_rate_end:
    :param plotting:
    :param model_name:
    :return:
    """

    log_norm_history = []
    variational_param = init_param.copy()
    prev_elbo = 0.
    pmz_size = init_param.size
    optimisation_log = {}
    variational_param_history_list = []
    variational_param_post_conv_history_list = []
    # index for iters
    t = 0
    # index for iters after convergence ..
    j = 0
    N_overall = 50000
    sto_process_convergence = False
    sto_process_sigma_conv = False
    sto_process_mean_conv = False
    for o in range(n_optimizers):
        local_log_norm_history = []
        local_grad_history = []
        log_norm_history = []
        value_history = []
        elbo_diff_rel_med = 10.
        elbo_diff_rel_avg = 10.
        elbo_diff_rel_list = []
        np.random.seed(seed=o)
        if o >= 1:
            variational_param = init_param + stats.norm.rvs(
                size=len(init_param)) * (o + 1) * 0.1
        schedule = learning_rate_schedule(n_iters, learning_rate,
                                          learning_rate_end)
        t = 0
        variational_param_history = []
        variational_param_post_conv_history = []
        mcse_all = np.zeros((pmz_size, 1))
        stop = False
        for curr_learning_rate in schedule:
            if t == N_overall:
                break

            if sto_process_convergence:
                j = j + 1

            if has_log_norm == 1:
                obj_val, obj_grad, log_norm = objective_and_grad(
                    variational_param)
            else:
                obj_val, obj_grad = objective_and_grad(variational_param)
                log_norm = 0.

            if stopping_rule == 1 and t > 1000 and t % eval_elbo == 0:
                elbo_diff_rel = np.abs(obj_val - prev_elbo) / (prev_elbo +
                                                               1e-8)
                elbo_diff_rel_list.append(elbo_diff_rel)
                elbo_diff_rel_med = np.nanmedian(elbo_diff_rel_list)
                elbo_diff_rel_avg = np.nanmean(elbo_diff_rel_list)

            prev_elbo = obj_val
            start_stats = 1500
            if stopping_rule == 2 and t > 1500 and t % eval_elbo == 0:
                #print(np.nanmedian(mcse_all[:, -1]))
                mcse_se_combined_list = monte_carlo_se(
                    np.array(variational_param_history)[None, :], 0)
                mcse_all = np.hstack((mcse_all, mcse_se_combined_list[:,
                                                                      None]))

            value_history.append(obj_val)
            local_grad_history.append(obj_grad)
            local_log_norm_history.append(log_norm)
            log_norm_history.append(log_norm)
            if len(local_grad_history) > window:
                local_grad_history.pop(0)
                local_log_norm_history.pop(0)

            grad_scale = np.exp(
                np.min(local_log_norm_history) -
                np.array(local_log_norm_history))
            scaled_grads = grad_scale[:, np.newaxis] * np.array(
                local_grad_history)
            accum_sum = np.sum(scaled_grads**2, axis=0)
            variational_param = variational_param - curr_learning_rate * obj_grad / np.sqrt(
                epsilon + accum_sum)
            #if i >= 0:
            variational_param_history.append(variational_param.copy())

            if t % 10 == 0:
                avg_loss = np.mean(value_history[max(0, t - 1000):t + 1])

            t = t + 1
            if stopping_rule == 1 and stop == False and elbo_diff_rel_med <= tolerance:
                N_overall = t + 100
                stop = True
            if stopping_rule == 1 and stop == False and elbo_diff_rel_avg <= tolerance:
                N_overall = t + 100
                stop = True

            if stopping_rule ==2 and stop == False and sto_process_convergence == True and t > 1500 and \
                    t % eval_elbo == 0 and (np.nanmedian(mcse_all[:,-1]) <= epsilon) and j > 300:
                print('Optimization stopping reliably!')
                stop = True
                break

            variational_param_history_array = np.array(
                variational_param_history)
            if stopping_rule == 2 and t % eval_elbo == 0 and t > 800 and sto_process_convergence == False:
                variational_param_history_list.append(
                    variational_param_history_array)
                variational_param_history_chains = np.stack(
                    variational_param_history_list, axis=0)
                variational_param_history_list.pop(0)
                rhats_halfway_last = compute_R_hat(
                    variational_param_history_chains, warmup=0.5)[1]
                rhat_mean_halfway, rhat_sigma_halfway = rhats_halfway_last[:K], rhats_halfway_last[
                    K:]

                if (rhat_mean_halfway < r_mean_threshold
                    ).all() and sto_process_mean_conv == False:
                    start_swa_m_iters = t
                    print('Rhat- All means converged ...')
                    sto_process_mean_conv = True
                    start_stats = start_swa_m_iters

                if (rhat_sigma_halfway < r_sigma_threshold
                    ).all() and sto_process_sigma_conv == False:
                    start_swa_s_iters = t
                    print('Rhat- All sigmas converged ...')
                    sto_process_sigma_conv = True
                    start_stats = start_swa_s_iters

            if sto_process_mean_conv == True and sto_process_sigma_conv == True:
                sto_process_convergence = True
                start_stats = np.maximum(start_swa_m_iters, start_swa_s_iters)

            if sto_process_convergence:
                variational_param_post_conv_history.append(variational_param)

            if sto_process_convergence and j > 100 and t % (eval_elbo) == 0:
                variational_param_post_conv_history_array = np.array(
                    variational_param_post_conv_history)
                variational_param_post_conv_history_list.append(
                    variational_param_post_conv_history_array)
                variational_param_post_conv_history_chains = np.stack(
                    variational_param_post_conv_history_list, axis=0)
                variational_param_post_conv_history_list.pop(0)
                pmz_size = variational_param_post_conv_history_chains.shape[2]
                Neff = np.zeros(pmz_size)
                Rhot = []
                khat_iterates = []
                khat_iterates2 = []
                # compute khat for iterates
                for k in range(pmz_size):
                    neff, rho_t_sum, autocov, rho_t = autocorrelation(
                        variational_param_post_conv_history_chains, 0, k)
                    #mcse_se_combined = monte_carlo_se2(variational_param_history_chains, start_stats,i)
                    Neff[k] = neff
                    #mcmc_se2.append(mcse_se_combined)
                    Rhot.append(rho_t)
                    khat_i = compute_khat_iterates(
                        variational_param_post_conv_history_chains,
                        0,
                        k,
                        increasing=True)
                    khat_iterates.append(khat_i)
                    khat_i2 = compute_khat_iterates(
                        variational_param_post_conv_history_chains,
                        0,
                        k,
                        increasing=False)
                    khat_iterates2.append(khat_i2)

                rhot_array = np.stack(Rhot, axis=0)
                khat_combined = np.maximum(khat_iterates, khat_iterates2)

    if sto_process_convergence:

        optimisation_log['start_avg_mean_iters'] = start_swa_m_iters
        optimisation_log['start_avg_sigma_iters'] = start_swa_s_iters
        optimisation_log['r_hat_mean_halfway'] = rhat_mean_halfway
        optimisation_log['r_hat_sigma_halfway'] = rhat_sigma_halfway

        try:
            Neff
        except NameError:
            pass
        else:
            optimisation_log['neff'] = Neff
            optimisation_log['autocov'] = autocov
            optimisation_log['rhot'] = rhot_array
            optimisation_log['start_stats'] = start_stats
            # optimisation_log['mcmc_se2'] = mcmc_se2_array
            optimisation_log['khat_iterates_comb'] = khat_combined

    if stopping_rule == 1:
        start_stats = t - tail_avg_iters

    if stopping_rule == 1:
        smoothed_opt_param = np.mean(
            variational_param_history_array[start_stats:, :], axis=0)
    elif stopping_rule == 2 and sto_process_convergence == True:
        smoothed_opt_param = np.mean(
            variational_param_post_conv_history_chains[0, :], axis=0)

    if stopping_rule == 2 and sto_process_convergence == False:
        smoothed_opt_param = np.mean(
            variational_param_history_array[start_stats:, :], axis=0)

    if plotting:
        fig = plt.figure(figsize=(4.2, 2.5))
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(rhot_array[0, :100], label='loc-1')
        ax.plot(rhot_array[1, :100], label='loc-2')
        #ax.plot(rhot_array[2, :100], label='loc-3')
        plt.xlabel('Lags')
        plt.ylabel('autocorrelation')
        plt.legend()
        plt.savefig('autocor_model_adagrad_mean_mf.pdf')

        fig = plt.figure(figsize=(4.2, 2.5))
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(rhot_array[K, :100], label='sigma-1')
        ax.plot(rhot_array[K + 1, :100], label='sigma-2')
        #ax.plot(rhot_array[K + 2, :100], label='sigma-3')
        plt.xlabel('Lags')
        plt.ylabel('autocorrelation')
        plt.legend()
        plt.savefig('autocor_model_adagrad_sigma_mf.pdf')
        khat_array = optimisation_log['khat_iterates_comb']

    return (smoothed_opt_param, variational_param_history,
            np.array(value_history), np.array(log_norm_history),
            optimisation_log)
Exemple #5
0
def procFlares(prefix,
               filenames,
               path,
               clobberGP=False,
               makePlots=False,
               writeLog=True):
    if makePlots:
        plots_path = path + 'plots/'
        if not os.path.exists(plots_path):
            os.makedirs(plots_path)

    gp_path = path + 'gp/'

    #if not os.path.exists(gp_path):
    #os.makedirs(gp_path)

    log_path = path + 'log/'

    #if not os.path.exists(log_path):
    #os.makedirs(log_path)

    if writeLog:
        if os.path.exists(log_path + prefix + '.log'):
            os.remove(log_path + prefix + '.log')

    # Columns for flare table
    FL_files = np.array([])
    FL_TICs = np.array([])
    FL_id = np.array([])
    FL_t0 = np.array([])
    FL_t1 = np.array([])
    FL_f0 = np.array([])
    FL_f1 = np.array([])
    FL_ed = np.array([])
    FL_ed_err = np.array([])
    FL_skew = np.array([])
    FL_cover = np.array([])
    FL_mu = np.array([])
    FL_std = np.array([])
    FL_g_amp = np.array([])
    FL_mu_err = np.array([])
    FL_std_err = np.array([])
    FL_g_amp_err = np.array([])
    FL_tpeak = np.array([])
    FL_fwhm = np.array([])
    FL_f_amp = np.array([])
    FL_tpeak_err = np.array([])
    FL_fwhm_err = np.array([])
    FL_f_amp_err = np.array([])
    FL_g_chisq = np.array([])
    FL_f_chisq = np.array([])
    FL_g_fwhm_win = np.array([])
    FL_f_fwhm_win = np.array([])

    # Columns for param table
    P_median = np.array([])
    P_s_window = np.array([])
    P_acf_1dt = np.array([])
    P_acf_amp = np.array([])

    failed_files = []

    for k in range(len(filenames)):
        start_time = timing.time()
        filename = filenames[k]
        TIC = int(filename.split('-')[-3])
        file = path + filename

        if makePlots:
            fig, axes = plt.subplots(figsize=(16, 16), nrows=4, sharex=True)

        print('Processing ' + filename)
        gp_data_file = gp_path + filename + '.gp'
        gp_param_file = gp_path + filename + '.gp.par'
        median = -1
        s_window = -1
        acf_1dt = -1
        acf_amp = -1
        with fits.open(file, mode='readonly') as hdulist:
            try:
                tess_bjd = hdulist[1].data['TIME']
                quality = hdulist[1].data['QUALITY']
                pdcsap_flux = hdulist[1].data['PDCSAP_FLUX']
                pdcsap_flux_error = hdulist[1].data['PDCSAP_FLUX_ERR']
            except:
                P_median = np.append(P_median, median)
                P_s_window = np.append(P_s_window, s_window)
                P_acf_1dt = np.append(P_acf_1dt, acf_1dt)
                P_acf_amp = np.append(P_acf_amp, acf_amp)
                failed_files.append(filename)
                np.savetxt(gp_data_file, ([]))
                print('Reading file ' + filename + ' failed')
                continue

        if makePlots:
            axes[0].plot(tess_bjd, pdcsap_flux)

        # Cut out poor quality points
        ok_cut = (quality == 0) & (~np.isnan(tess_bjd)) & (~np.isnan(pdcsap_flux))\
                  & (~np.isnan(pdcsap_flux_error))

        tbl = Table([tess_bjd[ok_cut], pdcsap_flux[ok_cut], \
                  pdcsap_flux_error[ok_cut]],
                     names=('TIME', 'PDCSAP_FLUX', 'PDCSAP_FLUX_ERR'))
        df_tbl = tbl.to_pandas()

        median = np.nanmedian(df_tbl['PDCSAP_FLUX'])

        # Estimate the period of the LC with autocorrelation
        acf = fh.autocorr_estimator(tbl['TIME'], tbl['PDCSAP_FLUX']/median, \
                                    yerr=tbl['PDCSAP_FLUX_ERR']/median,
                                    min_period=0.1, max_period=27, max_peaks=2)
        if len(acf['peaks']) > 0:
            acf_1dt = acf['peaks'][0]['period']
            acf_amp = acf['autocorr'][1][np.where(
                acf['autocorr'][0] == acf_1dt)]
            mask = np.where(
                (acf['autocorr'][0] == acf['peaks'][0]['period']))[0]
            acf_1pk = acf['autocorr'][1][mask][0]
            s_window = int(acf_1dt /
                           np.fabs(np.nanmedian(np.diff(df_tbl['TIME']))) / 6)
        else:
            acf_1dt = (tbl['TIME'][-1] - tbl['TIME'][0]) / 2
            acf_amp = 0
            s_window = 128

        P_median = np.append(P_median, median)
        P_s_window = np.append(P_s_window, s_window)
        P_acf_1dt = np.append(P_acf_1dt, acf_1dt)
        P_acf_amp = np.append(P_acf_amp, acf_amp)

        # Run GP fit on the lightcurve if we haven't already
        if os.path.exists(gp_data_file) and not clobberGP:
            # Failed GP regression will produce an empty file
            if os.path.getsize(gp_data_file) == 0:
                print(file + ' failed (previously) during GP regression')
                failed_files.append(filename)
                continue

            print('GP file already exists, loading...')
            times, smo, var = np.loadtxt(gp_data_file)
        else:
            smo = np.zeros(len(df_tbl['TIME']))
            try:
                if makePlots:
                    ax = axes[1]
                else:
                    ax = None
                times, smo, var, params = iterGP_rotation(df_tbl['TIME'].values, df_tbl['PDCSAP_FLUX'].values/median, \
                                          df_tbl['PDCSAP_FLUX_ERR'].values/median, acf_1dt, acf_1pk, ax=ax)

                #np.savetxt(gp_param_file, params['logs2'], params['logamp'], params['logperiod'], \
                #           params['logq0'], params['logdeltaq'], params['mix'], params['period'])
                np.savetxt(gp_param_file, params)
                np.savetxt(gp_data_file, (times, smo, var))

            except:
                traceback.print_exc()
                failed_files.append(filename)
                np.savetxt(gp_data_file, ([]))
                print(filename + ' failed during GP fitting')
                continue

        # The GP is produced from a downsampled lightcurve. Need to interpolate to
        # compare GP and full LC

        smo_int = np.interp(tbl['TIME'], times, smo)

        # Search for flares in the smoothed lightcurve
        x = np.array(tbl['TIME'])
        y = np.array(tbl['PDCSAP_FLUX'] / median - smo_int)
        yerr = np.array(tbl['PDCSAP_FLUX_ERR'] / median)

        FL = fh.FINDflare(y,
                          yerr,
                          avg_std=True,
                          std_window=s_window,
                          N1=3,
                          N2=1,
                          N3=3)

        if makePlots:
            axes[3].plot(x, y, zorder=1)
            for j in range(len(FL[0])):
                s1, s2 = FL[0][j], FL[1][j] + 1
                axes[3].scatter(x[s1:s2], y[s1:s2], zorder=2)

        # Measure properties of detected flares
        if makePlots:
            fig_fl, axes_fl = plt.subplots(figsize=(16, 16), nrows=4, ncols=4)

        for j in range(len(FL[0])):
            s1, s2 = FL[0][j], FL[1][j] + 1
            tstart, tstop = x[s1], x[s2]
            dx_fac = 10
            dx = tstop - tstart
            x1 = tstart - dx * dx_fac / 2
            x2 = tstop + dx * dx_fac / 2
            mask = (x > x1) & (x < x2)

            # Mask out other flare detections when fitting models
            other_mask = np.ones(len(x), dtype=bool)
            for i in range(len(FL[0])):
                s1other, s2other = FL[0][i], FL[1][i] + 1
                if i == j:
                    continue
                other_mask[s1other:s2other] = 0

            popt1, pstd1, g_chisq, popt2, pstd2, f_chisq, skew, cover = \
                fitFlare(x[other_mask], y[other_mask], yerr[other_mask], x1, x2)

            mu, std, g_amp = popt1[0], popt1[1], popt1[2]
            mu_err, std_err, g_amp_err = pstd1[0], pstd1[1], pstd1[2]

            tpeak, fwhm, f_amp = popt2[0], popt2[1], popt2[2]
            tpeak_err, fwhm_err, f_amp_err = pstd2[0], pstd2[1], pstd2[2]

            f_fwhm_win = fwhm / (x2 - x1)
            g_fwhm_win = std / (x2 - x1)

            ed, ed_err = measureED(x, y, yerr, tpeak, fwhm)

            FL_files = np.append(FL_files, filename)
            FL_TICs = np.append(FL_TICs, TIC)
            FL_t0 = np.append(FL_t0, x1)
            FL_t1 = np.append(FL_t1, x2)
            FL_f0 = np.append(FL_f0, np.nanmedian(tbl['PDCSAP_FLUX'][s1:s2]))
            FL_f1 = np.append(FL_f1, np.nanmax(tbl['PDCSAP_FLUX'][s1:s2]))
            FL_ed = np.append(FL_ed, ed)
            FL_ed_err = np.append(FL_ed_err, ed_err)

            FL_skew = np.append(FL_skew, skew)
            FL_cover = np.append(FL_cover, cover)
            FL_mu = np.append(FL_mu, mu)
            FL_std = np.append(FL_std, std)
            FL_g_amp = np.append(FL_g_amp, g_amp)
            FL_mu_err = np.append(FL_mu_err, mu_err)
            FL_std_err = np.append(FL_std_err, std_err)
            FL_g_amp_err = np.append(FL_g_amp_err, g_amp_err)

            FL_tpeak = np.append(FL_tpeak, tpeak)
            FL_fwhm = np.append(FL_fwhm, fwhm)
            FL_f_amp = np.append(FL_f_amp, f_amp)
            FL_tpeak_err = np.append(FL_tpeak_err, tpeak_err)
            FL_fwhm_err = np.append(FL_fwhm_err, fwhm_err)
            FL_f_amp_err = np.append(FL_f_amp_err, f_amp_err)

            FL_g_chisq = np.append(FL_g_chisq, g_chisq)
            FL_f_chisq = np.append(FL_f_chisq, f_chisq)

            FL_g_fwhm_win = np.append(FL_g_fwhm_win, g_fwhm_win)
            FL_f_fwhm_win = np.append(FL_f_fwhm_win, f_fwhm_win)

            if makePlots and j < 15:
                row_idx = j // 4
                col_idx = j % 4
                axes_fl[row_idx][col_idx].errorbar(x[mask],
                                                   y[mask],
                                                   yerr=yerr[mask])
                axes_fl[row_idx][col_idx].scatter(x[s1:s2], y[s1:s2])

                if popt1[0] > 0:
                    xmodel = np.linspace(x1, x2)
                    ymodel = fh.aflare1(xmodel, tpeak, fwhm, f_amp)
                    axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{f}$ = ' + '{:.3f}'.format(f_chisq) \
                                                + '\n FWHM/window = ' + '{:.2f}'.format(f_fwhm_win))
                    ymodel = fh.gaussian(xmodel, mu, std, g_amp)
                    axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{g}$ = ' + '{:.3f}'.format(g_chisq) \
                                                + '\n FWHM/window = ' + '{:.2f}'.format(g_fwhm_win))
                    axes_fl[row_idx][col_idx].axvline(tpeak - fwhm / 2,
                                                      linestyle='--')
                    axes_fl[row_idx][col_idx].axvline(tpeak + fwhm / 2,
                                                      linestyle='--')
                    axes_fl[row_idx][col_idx].legend()
                    axes_fl[row_idx][col_idx].set_title('Skew = ' +
                                                        '{:.3f}'.format(skew))

        if makePlots:
            fig.suptitle(filename)
            axes[0].set_xlabel('Time [BJD - 2457000, days]')
            axes[0].set_ylabel('Flux [e-/s]')
            axes[1].set_xlabel('Time [BJD - 2457000, days]')
            axes[1].set_ylabel('Normalized Flux')
            axes[2].set_xlabel('Time [BJD - 2457000, days]')
            axes[2].set_ylabel('Rolling STD of GP')
            axes[3].set_xlabel('Time [BJD - 2457000, days]')
            axes[3].set_ylabel('Normalized Flux - GP')
            fig.savefig(plots_path + filename + '.png', format='png')

            if len(FL[0] > 0):
                fig_fl.suptitle(filename)
                fig_fl.savefig(plots_path + filename + '_flares.png',
                               format='png')

            plt.clf()

        if writeLog:
            with open(log_path + prefix + '.log', 'a') as f:
                time_elapsed = timing.time() - start_time
                num_flares = len(FL[0])

                f.write('{:^15}'.format(str(k+1) + '/' + str(len(filenames))) + \
                        '{:<60}'.format(filename) + '{:<20}'.format(time_elapsed) + \
                        '{:<10}'.format(num_flares) + '\n')

        # Periodically write to the flare table file and param table file
        l = k + 1
        ALL_TIC = pd.Series(filenames).str.split(
            '-', expand=True).iloc[:, -3].astype('int')
        ALL_FILES = pd.Series(filenames).str.split('/', expand=True).iloc[:,
                                                                          -1]

        flare_out = pd.DataFrame(data={'file':FL_files,'TIC':FL_TICs, 't0':FL_t0, 't1':FL_t1, \
                                    'med_flux':FL_f0, 'peak_flux':FL_f1, 'ed':FL_ed, \
                                    'ed_err':FL_ed_err, 'skew':FL_skew, 'cover':FL_cover, \
                                    'mu':FL_mu, 'std':FL_std, 'g_amp': FL_g_amp, 'mu_err':FL_mu_err, \
                                    'std_err':FL_std_err, 'g_amp_err':FL_g_amp_err,'tpeak':FL_tpeak, \
                                    'fwhm':FL_fwhm, 'f_amp':FL_f_amp, 'tpeak_err':FL_tpeak_err, \
                                    'fwhm_err':FL_fwhm_err, 'f_amp_err':FL_f_amp_err,'f_chisq':FL_f_chisq, \
                                    'g_chisq':FL_g_chisq, 'f_fwhm_win':FL_f_fwhm_win, 'g_fwhm_win':FL_g_fwhm_win})
        flare_out.to_csv(log_path + prefix + '_flare_out.csv', index=False)

        param_out = pd.DataFrame(data={'file':ALL_FILES[:l], 'TIC':ALL_TIC[:l], 'med':P_median[:l], \
                                    's_window':P_s_window[:l], 'acf_1dt':P_acf_1dt[:l], 'acf_amp':P_acf_amp[:l]})
        param_out.to_csv(log_path + prefix + '_param_out.csv', index=False)

    for k in range(len(failed_files)):
        print(failed_files[k])
Exemple #6
0
def aflare1(t, tpeak, fwhm, ampl, upsample=False, uptime=10):
    '''
    The Analytic Flare Model evaluated for a single-peak (classical).
    Reference Davenport et al. (2014) http://arxiv.org/abs/1411.3723
    Use this function for fitting classical flares with most curve_fit
    tools.
    Note: this model assumes the flux before the flare is zero centered
    Parameters
    ----------
    t : 1-d array
        The time array to evaluate the flare over
    tpeak : float
        The time of the flare peak
    fwhm : float
        The "Full Width at Half Maximum", timescale of the flare
    ampl : float
        The amplitude of the flare
    upsample : bool
        If True up-sample the model flare to ensure more precise energies.
    uptime : float
        How many times to up-sample the data (Default is 10)
    Returns
    -------
    flare : 1-d array
        The flux of the flare model evaluated at each time
    '''

    # Large weight for bad parameters, for least squares fitting
    if fwhm <= 0 or ampl <= 0:
        return np.inf

    _fr = [1.00000, 1.94053, -0.175084, -2.24588, -1.12498]
    _fd = [0.689008, -1.60053, 0.302963, -0.278318]

    if upsample:
        dt = np.nanmedian(np.diff(t))
        timeup = np.linspace(min(t) - dt, max(t) + dt, t.size * uptime)

        flareup = np.piecewise(
            timeup,
            [(timeup <= tpeak) * (timeup - tpeak) / fwhm > -1.,
             (timeup > tpeak)],
            [
                lambda x: (
                    _fr[0] +  # 0th order
                    _fr[1] * ((x - tpeak) / fwhm) +  # 1st order
                    _fr[2] * ((x - tpeak) / fwhm)**2. +  # 2nd order
                    _fr[3] * ((x - tpeak) / fwhm)**3. +  # 3rd order
                    _fr[4] * ((x - tpeak) / fwhm)**4.),  # 4th order
                lambda x: (_fd[0] * np.exp(
                    ((x - tpeak) / fwhm) * _fd[1]) + _fd[2] * np.exp(
                        ((x - tpeak) / fwhm) * _fd[3]))
            ]) * np.abs(ampl)  # amplitude

        # and now downsample back to the original time...
        ## this way might be better, but makes assumption of uniform time bins
        # flare = np.nanmean(flareup.reshape(-1, uptime), axis=1)

        ## This way does linear interp. back to any input time grid
        # flare = np.interp(t, timeup, flareup)

        ## this was uses "binned statistic"
        downbins = np.concatenate((t - dt / 2., [max(t) + dt / 2.]))
        flare, _, _ = binned_statistic(timeup,
                                       flareup,
                                       statistic='mean',
                                       bins=downbins)

    else:
        flare = np.piecewise(
            t,
            [(t <= tpeak) * (t - tpeak) / fwhm > -1., (t > tpeak)],
            [
                lambda x: (
                    _fr[0] +  # 0th order
                    _fr[1] * ((x - tpeak) / fwhm) +  # 1st order
                    _fr[2] * ((x - tpeak) / fwhm)**2. +  # 2nd order
                    _fr[3] * ((x - tpeak) / fwhm)**3. +  # 3rd order
                    _fr[4] * ((x - tpeak) / fwhm)**4.),  # 4th order
                lambda x: (_fd[0] * np.exp(
                    ((x - tpeak) / fwhm) * _fd[1]) + _fd[2] * np.exp(
                        ((x - tpeak) / fwhm) * _fd[3]))
            ]) * np.abs(ampl)  # amplitude

    return flare
Exemple #7
0
def FINDflare(flux,
              error,
              N1=3,
              N2=1,
              N3=3,
              avg_std=False,
              std_window=7,
              returnbinary=False,
              debug=False):
    '''
    The algorithm for local changes due to flares defined by
    S. W. Chang et al. (2015), Eqn. 3a-d
    http://arxiv.org/abs/1510.01005
    Note: these equations originally in magnitude units, i.e. smaller
    values are increases in brightness. The signs have been changed, but
    coefficients have not been adjusted to change from log(flux) to flux.
    Note: this algorithm originally ran over sections without "changes" as
    defined by Change Point Analysis. May have serious problems for data
    with dramatic starspot activity. If possible, remove starspot first!
    Parameters
    ----------
    flux : numpy array
        data to search over
    error : numpy array
        errors corresponding to data.
    N1 : int, optional
        Coefficient from original paper (Default is 3)
        How many times above the stddev is required.
    N2 : int, optional
        Coefficient from original paper (Default is 1)
        How many times above the stddev and uncertainty is required
    N3 : int, optional
        Coefficient from original paper (Default is 3)
        The number of consecutive points required to flag as a flare
    avg_std : bool, optional
        Should the "sigma" in this data be computed by the median of
        the rolling().std()? (Default is False)
        (Not part of original algorithm)
    std_window : float, optional
        If avg_std=True, how big of a window should it use?
        (Default is 25 data points)
        (Not part of original algorithm)
    returnbinary : bool, optional
        Should code return the start and stop indicies of flares (default,
        set to False) or a binary array where 1=flares (set to True)
        (Not part of original algorithm)
    '''

    med_i = np.nanmedian(flux)

    if debug is True:
        print("DEBUG: med_i = {}".format(med_i))

    if avg_std is False:
        sig_i = np.nanstd(flux)  # just the stddev of the window
    else:
        # take the average of the rolling stddev in the window.
        # better for windows w/ significant starspots being removed
        sig_i = np.nanmedian(
            pd.Series(flux).rolling(std_window, center=True).std())
    if debug is True:
        print("DEBUG: sig_i = ".format(sig_i))

    ca = flux - med_i
    cb = np.abs(flux - med_i) / sig_i
    cc = np.abs(flux - med_i - error) / sig_i

    if debug is True:
        print("DEBUG: N0={}, N1={}, N2={}".format(sum(ca > 0), sum(cb > N1),
                                                  sum(cc > N2)))

    # pass cuts from Eqns 3a,b,c
    ctmp = np.where((ca > 0) & (cb > N1) & (cc > N2))

    cindx = np.zeros_like(flux)
    cindx[ctmp] = 1

    # Need to find cumulative number of points that pass "ctmp"
    # Count in reverse!
    ConM = np.zeros_like(flux)
    # this requires a full pass thru the data -> bottleneck
    for k in range(2, len(flux)):
        ConM[-k] = cindx[-k] * (ConM[-(k - 1)] + cindx[-k])

    # these only defined between dl[i] and dr[i]
    # find flare start where values in ConM switch from 0 to >=N3
    istart_i = np.where((ConM[1:] >= N3) & (ConM[0:-1] - ConM[1:] < 0))[0] + 1

    # use the value of ConM to determine how many points away stop is
    istop_i = istart_i + (ConM[istart_i] - 1)

    istart_i = np.array(istart_i, dtype='int')
    istop_i = np.array(istop_i, dtype='int')

    if returnbinary is False:
        return istart_i, istop_i
    else:
        bin_out = np.zeros_like(flux, dtype='int')
        for k in range(len(istart_i)):
            bin_out[istart_i[k]:istop_i[k] + 1] = 1
        return bin_out