예제 #1
0
def simulation_mohler():
    c = simulate.MohlerSimulation()
    c.run()
    data = c.data
    max_delta_t = 100.
    max_delta_d = 1.
    initial_est = lambda x, y: estimation.estimator_exp_gaussian(x, y, ct=0.1, cd=0.1)
    r = pp_models.SeppStochasticNn(data=data,
                                   max_delta_d=max_delta_d,
                                   max_delta_t=max_delta_t,
                                   estimation_function=initial_est)
    r.train(niter=50)
    pp_plotting.multiplots(r, c)
예제 #2
0
def validate_point_process(
        start_date=datetime.date(2011, 3, 1),
        end_date=datetime.date(2012, 3, 31),
        initial_cutoff=212,
        num_validation=100,
        num_pp_iter=100,
        grid=250,
        n_sample_per_grid=20,
        domain=None,
        pp_class=pp_models.SeppStochasticNn,
        model_kwargs=None):

    if not model_kwargs:
        model_kwargs = {
            'max_delta_t': 150,
            'max_delta_d': 500,
            'bg_kde_kwargs': {'number_nn': [101, 16]},
            'trigger_kde_kwargs': {'number_nn': 15},
            'remove_conincident_pairs': False,
            'estimation_function': lambda x, y: estimation.estimator_exp_gaussian(x, y, 0.1, 50, frac_bg=None),
            'seed': 42,
        }

    if start_date + datetime.timedelta(days=initial_cutoff + num_validation) > end_date:
        warnings.warn("Requested number of validation runs is too large for the data size")

    if domain is None:
        domain = compute_chicago_region()

    data, t0, cid = get_crimes_by_type(
        crime_type='burglary',
        start_date=start_date,
        end_date=end_date,
        domain=domain,
    )

    sepp = pp_class(data=data, **model_kwargs)

    vb = validate.SeppValidationFixedModelIntegration(data, spatial_domain=domain, cutoff_t=initial_cutoff)
    vb.set_sample_units(grid, n_sample_per_grid=n_sample_per_grid)

    ## TODO: check the number of iterations reported is as expected here
    res = vb.run(time_step=1, n_iter=num_validation,
                 train_kwargs={'niter': num_pp_iter},
                 verbose=True)

    return res, vb
예제 #3
0
def construct_sepp(data,
                   sepp_class=pp_models.SeppStochasticNn,
                   seed=42,
                   max_delta_t=60,
                   max_delta_d=500,
                   initial_est=None,
                   remove_coincident_pairs=False,
                   bg_kde_kwargs=None,
                   trigger_kde_kwargs=None,
                   min_bandwidth=None):

    if initial_est is None:
        # define initial estimator
        initial_est = lambda x, y: estimation.estimator_exp_gaussian(
            x, y, ct=0.1, cd=50)

    if bg_kde_kwargs is None:
        bg_kde_kwargs = {'strict': False}
    if min_bandwidth is not None:
        bg_kde_kwargs['min_bandwidth'] = min_bandwidth

    if trigger_kde_kwargs is None:
        trigger_kde_kwargs = {'strict': False}
    if min_bandwidth is not None:
        trigger_kde_kwargs['min_bandwidth'] = min_bandwidth

    r = sepp_class(data=data,
                   max_delta_d=max_delta_d,
                   max_delta_t=max_delta_t,
                   bg_kde_kwargs=bg_kde_kwargs,
                   trigger_kde_kwargs=trigger_kde_kwargs,
                   estimation_function=initial_est,
                   remove_coincident_pairs=remove_coincident_pairs,
                   seed=seed)

    return r
        'max_delta_t':
        90,  # set on each iteration
        'max_delta_d':
        500,  # set on each iteration
        'bg_kde_kwargs': {
            'number_nn': [100, 15],
            'min_bandwidth': None,
            'strict': False
        },
        'trigger_kde_kwargs': {
            'number_nn': 15,
            'min_bandwidth': None,
            'strict': False
        },
        'estimation_function':
        lambda x, y: estimation.estimator_exp_gaussian(x, y, **estimate_kwargs
                                                       ),
        'seed':
        42,  # doesn't matter what this is, just want it fixed
        'remove_coincident_pairs':
        False
    }

    for col_space in col_spacings:
        print "row_space: %d, col_space: %d" % (row_space, col_space)

        # store containers
        data = []
        sepp = []

        for i in range(niter):
            net = create_grid_network(domain_extent, row_space, col_space)
예제 #5
0
def apply_point_process(start_date=datetime.datetime(2010, 3, 1, 0),
                        end_date=datetime.datetime(2010, 6, 1, 0),
                        domain=None,
                        niter=15,
                        min_bandwidth=None,
                        max_delta_t=40,
                        max_delta_d=300,
                        num_nn=None,
                        estimate_kwargs=None,
                        pp_class=pp_models.SeppStochasticNnReflected,
                        seed=42,
                        remove_coincident_pairs=False):

    print "Getting data..."
    res, t0, cid = get_crimes_by_type(
        crime_type='burglary',
        start_date=start_date,
        end_date=end_date,
        domain=domain
    )



    # if min_bandwidth is None:
    #     min_bandwidth = np.array([0.3, 5., 5.])

    if num_nn is not None:
        if len(num_nn) != 2:
            raise AttributeError("Must supply two num_nn values: [1D case, 2/3D case]")
        num_nn_bg = num_nn
        num_nn_trig = num_nn[1]
    else:
        num_nn_bg = [101, 16]
        num_nn_trig = 15

    bg_kde_kwargs = {
        'number_nn': num_nn_bg,
        'strict': False,
    }

    trigger_kde_kwargs = {
        'min_bandwidth': min_bandwidth,
        'number_nn': num_nn_trig,
        'strict': False,
    }

    if not estimate_kwargs:
        estimate_kwargs = {
            'ct': 0.1,
            'cd': 50,
            'frac_bg': None
        }

    print "Instantiating SEPP class..."
    r = pp_class(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t,
                 bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs,
                 remove_coincident_pairs=remove_coincident_pairs)

    # r = pp_models.SeppStochasticNn(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t,
    #                             bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs)
    # r = pp_models.SeppStochasticNnReflected(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t,
    #                             bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs)
    # r = pp_models.SeppStochasticNnOneSided(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t,
    #                             bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs)

    print "Computing initial probability matrix estimate..."
    p = estimation.estimator_exp_gaussian(res, r.linkage, **estimate_kwargs)
    r.p = p

    # train on ALL data
    if seed:
        r.set_seed(seed)

    print "Starting training..."
    ps = r.train(niter=niter)
    return r
        try:
            sepp_obj
        except NameError:
            sepp_obj = {}  # already started

        for i in range(s0.size):
            tt = sorted([s0.flat[i], s1.flat[i]])
            if tuple(tt) in sepp_obj:
                continue
            c = simulate.MohlerSimulation()
            c.t_total = t_total
            c.bg_params[0]['sigma'] = [1., 1.]
            c.bg_params[0]['intensity'] = 5
            c.trigger_sigma = list(tt)
            init_est = lambda d, t: estimation.estimator_exp_gaussian(
                d, t, ct=0.1, cd=np.mean(tt))
            c.run()
            data = c.data
            r = models.SeppStochasticNn(data=data,
                                        max_delta_d=max_delta_d,
                                        max_delta_t=max_delta_t,
                                        estimation_function=init_est,
                                        seed=42,
                                        bg_kde_kwargs=bg_kde_kwargs,
                                        trigger_kde_kwargs=trigger_kde_kwargs)
            try:
                _ = r.train(niter=num_iter)
            except Exception:
                continue
            sepp_obj[tuple(tt)] = copy.deepcopy(r)
예제 #7
0
def apply_point_process(
    nicl_type=3,
    only_new=False,
    start_date=None,
    end_date=None,
    niter=50,
    num_nn=None,
    min_bandwidth=None,
    jiggle_scale=None,
    max_delta_t=60,  # days
    max_delta_d=500,  # metres
    sepp_class=pp_models.SeppStochasticNnReflected,
    data=None,
    rng_seed=42,
    initial_est=None,
    remove_coincident_pairs=False,
):

    # suggested value:
    # min_bandwidth = np.array([0.3, 5., 5.])

    # get data
    if data is not None:
        res = data
    else:
        res, t0, cid = get_crimes_by_type(nicl_type=nicl_type,
                                          only_new=only_new,
                                          jiggle_scale=jiggle_scale,
                                          start_date=start_date,
                                          end_date=end_date)

    if initial_est is None:
        # define initial estimator
        initial_est = lambda x, y: estimation.estimator_exp_gaussian(
            x, y, ct=0.1, cd=50)

    if num_nn is not None:
        if len(num_nn) != 2:
            raise AttributeError(
                "Must supply two num_nn values: [1D case, 2/3D case]")
        num_nn_bg = num_nn
        num_nn_trig = num_nn[1]
    else:
        num_nn_bg = [101, 16]
        num_nn_trig = 15

    bg_kde_kwargs = {
        'number_nn': num_nn_bg,
        'strict':
        False  # attempt to restore order even if number of BG becomes less than requested NNs
    }

    trigger_kde_kwargs = {
        'min_bandwidth': min_bandwidth,
        'number_nn': num_nn_trig,
        'strict':
        False  # attempt to restore order even if number of trig becomes less than requested NNs
    }

    return apply_sepp_to_data(
        res,
        max_delta_t=max_delta_t,
        max_delta_d=max_delta_d,
        estimation_function=initial_est,
        niter=niter,
        bg_kde_kwargs=bg_kde_kwargs,
        trigger_kde_kwargs=trigger_kde_kwargs,
        sepp_class=sepp_class,
        rng_seed=rng_seed,
        remove_coincident_pairs=remove_coincident_pairs,
    )
def simulate_data_and_train():

    niter = 10

    sim_t_total = 1000.
    sim_num_to_prune = 400
    # sim_bg_sigma = 100.

    train_kwargs = {
        'niter': 100,
    }

    estimate_kwargs = {
        'ct': 0.1,
        'cd': 50.,
        'frac_bg': None,
    }

    model_kwargs = {
        'parallel':
        True,
        'max_delta_t':
        90,  # set on each iteration
        'max_delta_d':
        500,  # set on each iteration
        'bg_kde_kwargs': {
            'number_nn': [100, 15],
            'min_bandwidth': None,
            'strict': False
        },
        'trigger_kde_kwargs': {
            'number_nn': 15,
            'min_bandwidth': None,
            'strict': False
        },
        'estimation_function':
        lambda x, y: estimation.estimator_exp_gaussian(x, y, **estimate_kwargs
                                                       ),
        'seed':
        42,  # doesn't matter what this is, just want it fixed
        'remove_coincident_pairs':
        False
    }

    for col_space in col_spacings:
        print "row_space: %d, col_space: %d" % (row_space, col_space)

        #  generate simulation parameters
        bg_params = []
        x_bg = np.arange(domain_extent[0], domain_extent[2], col_space)
        y_bg = np.arange(domain_extent[1], domain_extent[3], row_space)
        n_bg = x_bg.size * y_bg.size
        for x in x_bg:
            for y in y_bg:
                bg_params.append({
                    'location': [x, y],
                    'intensity': 2. / n_bg,
                    'sigma': [sim_bg_sigma, sim_bg_sigma]
                })

        # store containers
        data = []
        sepp = []

        for i in range(niter):
            c = simulate.PatchyGaussianSumBackground(bg_params=bg_params)
            c.trigger_params['sigma'] = [50., 50.]  # TODO: change this, too?
            c.run(t_total=sim_t_total, num_to_prune=sim_num_to_prune)
            data.append(c.data)
            r = models.SeppStochasticNnReflected(data=c.data, **model_kwargs)
            _ = r.train(**train_kwargs)
            sepp.append(r)

        out = {
            'row_spacing': row_space,
            'col_spacing': col_space,
            'domain_extent': domain_extent,
            'sim_t_total': sim_t_total,
            'sim_num_to_prune': sim_num_to_prune,
            'train_kwargs': train_kwargs,
            'model_kwargs': model_kwargs,
            'data': data,
            'sepp': sepp,
        }

        if not os.path.exists(subdir):
            os.makedirs(subdir)
        fn = 'simulated_data_patchy_bg_row_%d_col_%d.dill' % (row_space,
                                                              col_space)
        with open(os.path.join(subdir, fn), 'wb') as f:
            dill.dump(out, f)
예제 #9
0
    domain_nw = polys['Northwest']
    domain_s = polys['South']
    # domain = get_chicago_polys()['Northwest']
    tmp = get_chicago_data(domain=domain_nw)
    data, t0, cid = tmp['burglary']

    # normalise spatial component of data
    # data[:, 1] -= data[:, 1].min()
    # data[:, 2] -= data[:, 2].min()
    # scaling = np.mean(np.std(data[:, 1:], axis=0, ddof=1))
    # data[:, 1:] /= scaling
    scaling = 1.

    # est_fun = lambda x, y: estimation.estimator_exp_gaussian(x, y, ct=0.1, cd=50, frac_bg=0.8)
    est_fun = lambda x, y: estimation.estimator_exp_gaussian(
        x, y, ct=0.1, cd=50 / scaling, frac_bg=None)

    for num_nn in nns:
        trigger_kde_kwargs = {'strict': False, 'number_nn': num_nn[-1]}
        bg_kde_kwargs = {'strict': False, 'number_nn': list(num_nn)}

        sepp = pp_models.SeppStochasticNn(
            data=data,
            max_delta_t=max_t,
            max_delta_d=max_d / scaling,
            seed=42,
            estimation_function=est_fun,
            trigger_kde_kwargs=trigger_kde_kwargs,
            bg_kde_kwargs=bg_kde_kwargs,
            remove_coincident_pairs=True)
        sepp.train(niter=niter)