def test_old_v_new_obs_to_sim(N_traj=1000):
    # test the full multi-window case
    het_trajs = [
        fw.ab_window(
            [var.rvs for var in wait_vars],
            window_size=window,
            offset=-100 * np.sum([var.mean() for var in wait_vars]),
            num_replicates=N_traj,
            states=['A', 'B'],
        ) for window in np.array([1 / 2, 1, 2]) * example_window
    ]
    sim = pd.concat(het_trajs, ignore_index=True)
    traj_cols = ['window_end', 'replicate']
    multi_T_waits = fw.sim_to_obs(sim, traj_cols=traj_cols)
    with pytest.deprecated_call():
        multi_T_waits_old = sim \
            .groupby(traj_cols) \
            .apply(fw.traj_to_waits)
    for col in traj_cols:
        del multi_T_waits_old[col]

    assert np.all(multi_T_waits.reset_index()['rank_order'] ==
                  multi_T_waits_old.reset_index()['rank_order'])
    assert np.all(
        np.isclose(multi_T_waits['wait_time'], multi_T_waits_old['wait_time']))
    assert np.all(multi_T_waits['wait_type'] == multi_T_waits_old['wait_type'])
    assert np.all(multi_T_waits['state'] == multi_T_waits_old['state'])
def _boot_int_cdf(N_var_T):
    import multi_locus_analysis.finite_window as fw
    import multi_locus_analysis.plotting.finite_window as fplt
    N_traj_per_boot, var_pair, err_t = N_var_T
    T = np.max(err_t)
    var_pair = {name: var for name, var in var_pair}
    sim = ab_window([var.rvs for _, var in var_pair.items()],
                    offset=-100 *
                    np.sum([var.mean() for _, var in var_pair.items()]),
                    window_size=T,
                    num_replicates=N_traj_per_boot,
                    states=[name for name in var_pair])
    obs = fw.sim_to_obs(sim)
    res = {}
    for name, var in var_pair.items():
        exterior = fplt._ext_from_obs(obs, name)
        interior, _ = fplt._int_win_from_obs(obs, name)
        x, cdf = fw.ecdf_windowed(interior, T)
        res[name] = var.cdf(err_t) / var.cdf(T) - np.interp(err_t, x, cdf)
    return res
def test_old_v_new_sim_to_frame_obs(N_traj=1000):
    # the old method doesn't work in the multi-window case. We've tested the
    # new method by eye, but no automated tests yet
    sim = fw.ab_window(
        [var.rvs for var in wait_vars],
        window_size=example_window,
        offset=-100 * np.sum([var.mean() for var in wait_vars]),
        num_replicates=N_traj,
        states=['A', 'B'],
    )
    traj_cols = ['replicate']

    # first use old method for getting discrete frames, very involved
    T = sim.window_end.max()
    movie_frame_t = np.linspace(0, T, 21)
    movies = sim.groupby(traj_cols).apply(fw.traj_to_movie,
                                          times=movie_frame_t)
    movies = movies.T.unstack()
    movies.name = 'state'
    old_obs = pd.DataFrame(movies) \
        .reset_index() \
        .groupby(traj_cols) \
        .apply(fw.movie_to_waits)

    # now new method, much happier
    frames = fw.munging.simulation_to_frame_times(sim,
                                                  movie_frame_t,
                                                  traj_cols=traj_cols)
    new_obs = fw.sim_to_obs(frames.reset_index(), traj_cols=traj_cols)

    # now make the index match for easy comparison
    new_obs = new_obs.reset_index()
    new_obs['rank_order'] -= 1
    new_obs = new_obs.set_index(traj_cols + ['rank_order'])

    assert np.all(
        np.isclose(0,
                   np.abs(new_obs['wait_time'] - old_obs['wait_time']) > 0))
def _example_pareto_alpha(V_T_N):
    import multi_locus_analysis.finite_window as fw
    import multi_locus_analysis.plotting.finite_window as fplt

    # unpack parameters first
    (betas, xmin), T, N_traj = V_T_N
    var_pair = [
        fplt.Variable(scipy.stats.pareto(beta, scale=xmin),
                      name=f'Pareto({beta:0.3g})') for beta in betas
    ]
    # run one simulation
    sim = fw.ab_window([var.rvs for var in var_pair],
                       offset=-100 * np.sum([var.mean() for var in var_pair]),
                       window_size=T,
                       num_replicates=N_traj,
                       states=[var.name for var in var_pair])
    obs = fw.sim_to_obs(sim)

    # now extract alpha several different ways
    true_alpha = {var.name: var.args[0] + 1 for var in var_pair}
    mle_interior_est = {}
    mle_uncensored_baseline = {}
    fit_interior = {}
    fit_corrected = {}
    fit_kaplan = {}
    fit_uncensored_baseline = {}
    for var in var_pair:
        # mle, interior
        try:
            interior, windows = fplt._int_win_from_obs(obs, var.name)
            num_obs = len(interior)
            mle_interior_est[var.name] = _mla_stats.power_law_slope_mle(
                interior, xmin, num_obs)
        except:
            mle_interior_est[var.name] = np.nan
        # fit, interior
        try:
            x_int, cdf_int = fw.ecdf_windowed(interior, windows)
            fit_interior[var.name] = _alpha_from_cdf(x_int, cdf_int, xmin)
        except:
            fit_interior[var.name] = np.nan
        # fit, corrected
        try:
            exterior = fplt._ext_from_obs(obs, var.name)
            bin_centers, final_cdf = fw.ecdf_combined(exterior, interior, T)
            fit_corrected[var.name] = _alpha_from_cdf(bin_centers, final_cdf,
                                                      xmin)
        except:
            fit_corrected[var.name] = np.nan
        # fit, kaplan
        try:
            times = np.concatenate([interior, exterior])
            is_interior = np.concatenate(
                [np.ones_like(interior),
                 np.zeros_like(exterior)]).astype(bool)
            kmf = lifelines.KaplanMeierFitter() \
                .fit(times, event_observed=is_interior)
            x_kap = kmf.cumulative_density_.index.values
            cdf_kap = kmf.cumulative_density_.values.flatten()
            fit_kaplan[var.name] = _alpha_from_cdf(x_kap, cdf_kap, xmin)
        except:
            fit_kaplan[var.name] = np.nan
        # mle, uncensored baseline
        try:
            uncensored_obs = var.rvs(size=(num_obs, ))
            mle_uncensored_baseline[var.name] = _mla_stats.power_law_slope_mle(
                uncensored_obs, xmin, num_obs)
        except:
            mle_uncensored_baseline[var.name] = np.nan
        # fit, uncensored baseline
        try:
            x_unc, cdf_unc = _mla_stats.ecdf(uncensored_obs, pad_left_at_x=0)
            fit_uncensored_baseline[var.name] = \
                _alpha_from_cdf(x_unc, cdf_unc, xmin)
        except:
            fit_uncensored_baseline[var.name] = np.nan
    df = pd.concat(map(pd.Series, [
        true_alpha, mle_interior_est, mle_uncensored_baseline, fit_interior,
        fit_corrected, fit_kaplan, fit_uncensored_baseline
    ]),
                   axis=1)
    df.columns = [
        'true', 'mle-interior', 'mle-uncensored', 'fit-interior',
        'fit-corrected', 'fit-kaplan', 'fit-uncensored'
    ]
    return df
def _example_lambda_fit(V_T_N):
    import multi_locus_analysis.finite_window as fw
    import multi_locus_analysis.plotting.finite_window as fplt
    lambdas, T, N_traj = V_T_N
    var_pair = [
        fplt.Variable(expon(scale=lam), name=f"Exp({lam})") for lam in lambdas
    ]
    sim = fw.ab_window([var.rvs for var in var_pair],
                       offset=-100 * np.sum([var.mean() for var in var_pair]),
                       window_size=T,
                       num_replicates=N_traj,
                       states=[var.name for var in var_pair])
    obs = fw.sim_to_obs(sim)

    mean_est = fw.average_lifetime(obs)
    true_mean = {var.name: var.mean() for var in var_pair}
    naive_slope_est = {}
    correct_slope_est = {}
    kaplan_slope_est = {}
    uncensored_baseline = {}
    for var in var_pair:
        # naive
        interior, windows = fplt._int_win_from_obs(obs, var.name)
        try:
            x_int, cdf_int = fw.ecdf_windowed(interior, windows)
            naive_slope_est[var.name] = _mean_from_exp_cdf(x_int, cdf_int)
        except:
            naive_slope_est[var.name] = np.nan
        # corrected
        exterior = fplt._ext_from_obs(obs, var.name)
        try:
            bin_centers, final_cdf = fw.ecdf_combined(exterior, interior, T)
            correct_slope_est[var.name] = _mean_from_exp_cdf(
                bin_centers, final_cdf)
        except:
            correct_slope_est[var.name] = np.nan
        # kaplan
        times = np.concatenate([interior, exterior])
        is_interior = np.concatenate(
            [np.ones_like(interior),
             np.zeros_like(exterior)]).astype(bool)
        try:
            kmf = lifelines.KaplanMeierFitter() \
                    .fit(times, event_observed=is_interior)
            x_kap = kmf.cumulative_density_.index.values
            cdf_kap = kmf.cumulative_density_.values.flatten()
            kaplan_slope_est[var.name] = _mean_from_exp_cdf(x_kap, cdf_kap)
        except:
            kaplan_slope_est[var.name] = np.nan
        # uncensored baseline
        num_obs = len(interior)
        try:
            x_unc, cdf_unc = _mla_stats.ecdf(var.rvs(size=(num_obs, )),
                                             pad_left_at_x=0)
            uncensored_baseline[var.name] = _mean_from_exp_cdf(x_unc, cdf_unc)
        except:
            uncensored_baseline[var.name] = np.nan
    df = pd.concat(map(pd.Series, [
        true_mean, correct_slope_est, naive_slope_est, mean_est,
        kaplan_slope_est, uncensored_baseline
    ]),
                   axis=1)
    df.columns = [
        'true', 'corrected', 'naive', 'count-based', 'kaplan', 'uncensored'
    ]
    return df