コード例 #1
0
def run(model, model_updater, params_true, time=100):
    timer = Timer()

    trace = [_get_trace_row(model, model_updater, params_true, 0.0)]

    while timer.elapsed < time:
        timer.start()

        model_updater.update(model)

        timer.stop()

        trace.append(
            _get_trace_row(model, model_updater, params_true, timer.elapsed))

    df = pd.DataFrame(trace)

    model.params = params_true

    df["log_p_true"] = model.log_p

    df["rel_log_p"] = (df["log_p"] - df["log_p_true"]) / df["log_p_true"].abs()

    df = df[[
        "time", "num_features", "log_p", "log_p_true", "rel_log_p",
        "b_cubed_f", "b_cubed_p", "b_cubed_r", "max_particles"
    ]]

    return df
コード例 #2
0
def run(data_true, model, model_updater, params_true, time=100):
    timer = Timer()

    trace = [_get_trace_row(data_true, model, params_true, 0.0)]

    while timer.elapsed < time:
        timer.start()

        model_updater.update(model)

        timer.stop()

        trace.append(_get_trace_row(data_true, model, params_true, timer.elapsed))

    df = pd.DataFrame(trace)

    model.params = params_true

    df['log_p_true'] = model.log_p

    df['rel_log_p'] = (df['log_p'] - df['log_p_true']) / df['log_p_true'].abs()

    df = df[['time', 'num_features', 'log_p', 'log_p_true', 'rel_log_p', 'b_cubed_f', 'b_cubed_p', 'b_cubed_r', 'error']]

    return df
コード例 #3
0
def run(data_true, model, model_updater, params_true, time=100):
    trace = [_get_trace_row(data_true, model, params_true, 0.0)]

    # Run annealing if required
    timer = Timer()

    while model_updater.feat_alloc_updater.annealing_schedule(model_updater.feat_alloc_updater.iter) < 1.0:
        timer.start()

        model_updater.update(model)

        timer.stop()

        trace.append(_get_trace_row(data_true, model, params_true, timer.elapsed, annealed=True))

    annealing_time = timer.elapsed

    # Main run
    timer = Timer()

    while timer.elapsed < time:
        timer.start()

        model_updater.update(model)

        timer.stop()

        trace.append(_get_trace_row(data_true, model, params_true, timer.elapsed, annealed=False))

    df = pd.DataFrame(trace)

    model.params = params_true

    df["annealing_time"] = annealing_time

    df["log_p_true"] = model.log_p

    df["rel_log_p"] = (df["log_p"] - df["log_p_true"]) / df["log_p_true"].abs()

    df = df[[
        "annealed",
        "time",
        "annealing_time",
        "num_features",
        "log_p",
        "log_p_true",
        "rel_log_p",
        "b_cubed_f",
        "b_cubed_p",
        "b_cubed_r",
        "rmse"
    ]]

    return df
コード例 #4
0
def run(model, model_updater, time=100):
    timer = Timer()

    trace = [_get_trace_row(model, 0.0)]

    while timer.elapsed < time:
        timer.start()

        model_updater.update(model)

        timer.stop()

        trace.append(_get_trace_row(model, timer.elapsed))

    df = pd.DataFrame(trace)

    df = df[["time", "num_features", "num_features_used", "log_p"]]

    return df
コード例 #5
0
def run(data_true, model, model_updater, trace_writer, time=100):
    timer = Timer()

    trace = [_get_trace_row(data_true, model, 0.0)]

    while True:
        timer.start()

        model_updater.update(model)

        timer.stop()

        trace_writer.write_row(model, timer.elapsed)

        trace.append(_get_trace_row(data_true, model, timer.elapsed))

        if timer.elapsed >= time:
            break

    df = pd.DataFrame(trace)

    df = df[["time", "num_features", "num_features_used", "log_p", "rmse"]]

    return df
コード例 #6
0
def main(args):
    if args.ibp:
        print(
            'Warning: IBP sampling for the PyClone model is not properly supported.'
        )

    set_seed(args.data_seed)

    params = pgfa.models.pyclone.binomial.simulate_params(args.num_dims,
                                                          args.num_data_points,
                                                          K=args.num_features,
                                                          alpha=args.alpha)

    data = pgfa.models.pyclone.binomial.simulate_data(params)

    model_updater = get_model_updater(annealing_power=args.annealing_power,
                                      feat_alloc_updater_type=args.sampler,
                                      ibp=args.ibp,
                                      mixture_prob=args.mixture_prob,
                                      num_particles=args.num_particles,
                                      test_path=args.test_path)

    set_seed(args.param_seed)

    if args.ibp:
        model_K = None

    else:
        model_K = args.num_features

    model = pgfa.models.pyclone.binomial.get_model(data, K=model_K)

    set_seed(args.run_seed)

    old_params = model.params.copy()

    model.params = params.copy()

    log_p_true = model.log_p

    model.params = old_params.copy()

    print('Arguments')

    print('-' * 100)

    for key, value in sorted(vars(args).items()):
        print('{0}: {1}'.format(key, value))

    print('@' * 100)

    print('True feature counts (sorted): {}'.format(
        sorted(np.sum(params.Z, axis=0))))

    print('True log density: {}'.format(log_p_true))

    print('@' * 100)

    timer = Timer()

    i = 0

    last_print_time = -np.float('inf')

    while timer.elapsed < args.time:
        if (timer.elapsed - last_print_time) > args.print_freq:
            last_print_time = timer.elapsed

            print('Iteration: {}'.format(i))

            print('Log density: {}'.format(model.log_p))

            print('Relative log density: {}'.format(
                (model.log_p - log_p_true) / abs(log_p_true)))

            if args.ibp:
                print('Num features: {}'.format(model.params.K))

            print('B-Cube scores: {}'.format(
                get_b_cubed_score(params.Z, model.params.Z)))

            print('Feature counts (sorted): {}'.format(
                sorted(np.sum(model.params.Z, axis=0))))

            print('#' * 100)

        timer.start()

        model_updater.update(model)

        timer.stop()

        i += 1
コード例 #7
0
def main(args):
    set_seed(args.data_seed)

    params = pgfa.models.linear_gaussian.simulate_params(
        alpha=args.alpha,
        tau_v=args.tau_v,
        tau_x=args.tau_x,
        D=args.num_dims,
        K=args.num_features,
        N=args.num_data_points)

    data, data_true = pgfa.models.linear_gaussian.simulate_data(
        params, prop_missing=args.prop_missing)

    # Make sure we do not have rows/columns of data that are all missing
    for d in range(params.D):
        assert not np.all(np.isnan(data[:, d]))

    for n in range(params.N):
        assert not np.all(np.isnan(data[n]))

    model_updater = get_model_updater(annealing_power=args.annealing_power,
                                      feat_alloc_updater_type=args.sampler,
                                      ibp=args.ibp,
                                      mixture_prob=args.mixture_prob,
                                      num_particles=args.num_particles,
                                      test_path=args.test_path)

    set_seed(args.param_seed)

    if args.ibp:
        model_K = None

    else:
        model_K = args.num_features

    model = pgfa.models.linear_gaussian.get_model(data, K=model_K)

    set_seed(args.run_seed)

    old_params = model.params.copy()

    model.params = params.copy()

    log_p_true = model.log_p

    model.params = old_params.copy()

    print('Arguments')

    print('-' * 100)

    for key, value in sorted(vars(args).items()):
        print('{0}: {1}'.format(key, value))

    print('@' * 100)

    print('True feature counts (sorted): {}'.format(
        sorted(np.sum(params.Z, axis=0))))

    print('True log density: {}'.format(log_p_true))

    print('@' * 100)

    timer = Timer()

    i = 0

    last_print_time = -np.float('inf')

    while timer.elapsed < args.time:
        if (timer.elapsed - last_print_time) > args.print_freq:
            last_print_time = timer.elapsed

            print('Iteration: {}'.format(i))

            print('Log density: {}'.format(model.log_p))

            print('Relative log density: {}'.format(
                (model.log_p - log_p_true) / abs(log_p_true)))

            if args.prop_missing > 0:
                print('L2 error: {}'.format(
                    compute_l2_error(data, data_true, model.params)))

            if args.ibp:
                print('Num features: {}'.format(model.params.K))

            print('B-Cube scores: {}'.format(
                get_b_cubed_score(params.Z, model.params.Z)))

            print('Feature counts (sorted): {}'.format(
                sorted(np.sum(model.params.Z, axis=0))))

            print('#' * 100)

        timer.start()

        model_updater.update(model)

        timer.stop()

        i += 1
コード例 #8
0
def main(args):
    set_seed(args.data_seed)

    params = pgfa.models.lfrm.simulate_params(
        args.num_data_points, K=args.num_features, alpha=args.alpha, tau=args.tau
    )

    data, data_true = pgfa.models.lfrm.simulate_data(
        params, prop_missing=args.prop_missing, symmetric=args.symmetric
    )

    model_updater = get_model_updater(
        annealing_power=args.annealing_power,
        feat_alloc_updater_type=args.sampler,
        ibp=args.ibp,
        mixture_prob=args.mixture_prob,
        num_particles=args.num_particles,
        test_path=args.test_path
    )

    set_seed(args.param_seed)

    if args.ibp:
        model_K = None

    else:
        model_K = args.num_features

    model = pgfa.models.lfrm.get_model(data, K=model_K, symmetric=args.symmetric)

    old_params = model.params.copy()

    model.params = params.copy()

    log_p_true = model.log_p

    model.params = old_params.copy()

    print('Arguments')

    print('-' * 100)

    for key, value in sorted(vars(args).items()):
        print('{0}: {1}'.format(key, value))

    print('@' * 100)

    print('True feature counts (sorted): {}'.format(sorted(np.sum(params.Z, axis=0))))

    print('True log density: {}'.format(log_p_true))

    print('@' * 100)

    timer = Timer()

    i = 0

    last_print_time = -np.float('inf')

    while timer.elapsed < args.time:
        if (timer.elapsed - last_print_time) > args.print_freq:
            last_print_time = timer.elapsed

            print('Iteration: {}'.format(i))

            print('Log density: {}'.format(model.log_p))

            print('Relative log density: {}'.format((model.log_p - log_p_true) / abs(log_p_true)))

            print('Error: {}'.format(np.sum(np.abs(model.predict(method='max') - data_true))))

            if args.ibp:
                print('Num features: {}'.format(model.params.K))

            print('B-Cube scores: {}'.format(get_b_cubed_score(params.Z, model.params.Z)))

            print('Feature counts (sorted): {}'.format(sorted(np.sum(model.params.Z, axis=0))))

            print('#' * 100)

        timer.start()

        model_updater.update(model)

        timer.stop()

        i += 1