コード例 #1
0
ファイル: cad.py プロジェクト: gaberosser/crime-fighter
def apply_sepp_to_tabular_data(table_name):
    data, t0 = get_crimes_from_dump(table_name)
    est = lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=0.02)
    max_delta_t = 60
    max_delta_d = 500
    niter = 50
    bg_kde_kwargs = {
        'number_nn': [100, 15],
    }

    trigger_kde_kwargs = {
        'min_bandwidth': [0.5, 10, 10],
        'number_nn': 15,
    }
    sepp_class = pp_models.SeppStochasticNnReflected

    # filter data to provide correct quantity for training
    # data = data[data[:, 0] >= 151.]
    data = data[data[:, 0] <= 210.]

    r = apply_sepp_to_data(data,
                           max_delta_t=max_delta_t,
                           max_delta_d=max_delta_d,
                           estimation_function=est,
                           niter=niter,
                           bg_kde_kwargs=bg_kde_kwargs,
                           trigger_kde_kwargs=trigger_kde_kwargs,
                           sepp_class=sepp_class)

    return r
コード例 #2
0
def run_me(location_dir, max_t, max_d):
    data_file = os.path.join(IN_DIR, location_dir, 'simulation.pickle')
    out_dir = os.path.join(OUT_DIR, location_dir)
    log_file = os.path.join(out_dir,
                            'simulation_%.2f-%.2f.log' % (max_t, max_d))

    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    # set loggers
    logger = logging.getLogger('simulation_study')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    for t in ('point_process.models', 'kde.models'):
        this_logger = logging.getLogger(t)
        this_logger.setLevel(logging.DEBUG)
        this_logger.handlers = []  # clear existing handlers
        this_logger.addHandler(fh)  # replace with the same file output

    logger.info("Logger set.  Script started.")
    logger.info("Simulation study.")

    # load data
    with open(data_file, 'r') as f:
        data = pickle.load(f)
    logger.info("Loaded data.")

    logger.info("Instantiating SEPP object")

    r = pp_models.SeppStochasticNn(data=data,
                                   max_delta_d=max_d,
                                   max_delta_t=max_t,
                                   bg_kde_kwargs=bg_kde_kwargs,
                                   trigger_kde_kwargs=trigger_kde_kwargs)
    p = estimation.estimator_bowers(data, r.linkage)
    r.p = p
    r.set_seed(42)

    try:
        logger.info("Starting training run.")
        r.train(niter=niter)
    except Exception as exc:
        logger.error(repr(exc))
        res = None
    finally:
        file_stem = os.path.join(out_dir,
                                 'simulation_%.2f-%.2f' % (max_t, max_d))
        logger.info("Saving results (or None).")
        with open(file_stem + '-sepp_obj.pickle', 'w') as f:
            pickle.dump(r, f)
コード例 #3
0
ファイル: cad.py プロジェクト: gaberosser/crime-fighter
def validate_point_process(
    nicl_type=3,
    end_date=datetime.datetime(2012, 3, 1, tzinfo=pytz.utc),
    start_date=None,
    jiggle=None,
    num_validation=10,
    num_pp_iter=15,
    grid=100,
    time_step=1,
    pred_dt_plus=1,
):

    # get data
    res, t0, cid = get_crimes_by_type(nicl_type=nicl_type,
                                      only_new=True,
                                      jiggle_scale=jiggle,
                                      start_date=start_date)

    # find end_date in days from t0
    end_days = (end_date - t0).total_seconds() / SEC_IN_DAY

    # get domain
    poly = get_camden_region()

    vb = validate.SeppValidation(
        res,
        spatial_domain=poly,
        model_kwargs={
            'max_delta_t':
            60,
            'max_delta_d':
            1000,
            'estimation_function':
            lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=0.02),
            'trigger_kde_kwargs': {
                'min_bandwidth': np.array([0.3, 5., 5.])
            },
        })
    vb.set_sample_units(grid)
    vb.set_t_cutoff(end_days, b_train=False)

    res = vb.run(time_step=time_step,
                 t_upper=end_days + num_validation,
                 pred_dt_plus=pred_dt_plus,
                 train_kwargs={'niter': num_pp_iter},
                 verbose=True)

    return res, vb
コード例 #4
0
    start_date=datetime.datetime(2001, 1, 1, 0)
    end_date=datetime.datetime(2003, 1, 1)
    first_training_size = 50

    poly = compute_chicago_region()
    res, t0, cid = get_crimes_by_type(
        crime_type='burglary',
        start_date=start_date,
        end_date=end_date
    )

    vb = validate.PpValidation(res, spatial_domain=poly, model_kwargs={
        'max_trigger_t': 30,
        'max_trigger_d': 200,
        'estimator': lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=0.02),
    })
    vb.set_sample_units(250)
    vb.set_t_cutoff(first_training_size, b_train=False)

    sepp_res = vb.run(time_step=1, t_upper=first_training_size + 1,
                 train_kwargs={'niter': 20, 'tol_p': 1e-5},
                 verbose=True)

    # use basic historic data spatial hotspot
    sk = hotspot.SKernelHistoric(first_training_size) # use heatmap from same period
    vb_sk = validation.ValidationBase(res, sk, poly)
    vb_sk.roc.copy_sample_units(vb.roc)
    # vb_sk._grid = vb._grid
    # vb_sk.centroids = vb.centroids
    # vb_sk.a = vb.a
コード例 #5
0
    'max_delta_t':
    60,
    'max_delta_d':
    400,
    'bg_kde_kwargs': {
        'number_nn': [100, 15],
        'min_bandwidth': [0.5, 20, 20],
        'strict': False
    },
    'trigger_kde_kwargs': {
        'number_nn': 15,
        'min_bandwidth': [0.5, 20, 20],
        'strict': False
    },
    'estimation_function':
    lambda x, y: estimation.estimator_bowers(x, y, **estimate_kwargs),
    'seed':
    42,  # doesn't matter what this is, just want it fixed
}

niter = 75


## CAMDEN
@shutdown_decorator
def camden():

    start_day_numbers = [277, 307, 337,
                         367]  # number of days from t0 (1/3/2011)

    poly = cad.get_camden_region()
コード例 #6
0
    c = simulate.MohlerSimulation()
    # c.off_sigma_x = 1.0
    c.seed(42)
    c.run()
    data = np.array(c.data)  # (t, x, y)
    # sort data by time ascending (may be done already?)
    data = data[data[:, 0].argsort()]

    r0 = pp_class(data=data,
                  max_delta_d=max_delta_d,
                  max_delta_t=max_delta_t,
                  bg_kde_kwargs=bg_kde_kwargs,
                  trigger_kde_kwargs=trigger_kde_kwargs,
                  parallel=False)

    p0 = estimation.estimator_bowers(data, r0.linkage, ct=1, cd=10)
    r0.p = p0
    r0.set_seed(42)

    alpha = 0.1

    new_data = data.copy()
    new_data[:, 1:] *= alpha

    r1 = pp_class(data=new_data,
                  max_delta_d=max_delta_d * alpha,
                  max_delta_t=max_delta_t,
                  bg_kde_kwargs=bg_kde_kwargs,
                  trigger_kde_kwargs=trigger_kde_kwargs,
                  parallel=False)
コード例 #7
0
    # vb.set_t_cutoff(4.0)

    # use basic historic data spatial hotspot
    # sk = hotspot.SKernelHistoric(2) # use heatmap from final 2 days data
    # vb = ValidationBase(data, hotspot.Hotspot, camden.mpoly, model_args=(sk,))
    # vb.set_sample_units(grid_length=200)
    # vb.set_t_cutoff(4.0)

    # use Point process learning method
    # vb = SeppValidation(data, model_kwargs={
    #     'max_delta_t': 80,
    #     'max_delta_d': 0.75,
    #     'estimation_function': lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=10),
    #     })

    sepp = models.SeppStochasticNn(max_delta_t=80,
                                   max_delta_d=0.75,
                                   estimation_function=lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=10))

    vb = SeppValidationFixedModel(data, sepp)

    pred_kwargs = {
        'include': ('full', 'full_static', 'bg', 'trigger')
    }

    vb.set_sample_units(3, n_sample_per_grid=10)
    vb.set_t_cutoff(400, b_train=False)
    res = vb.run(time_step=5, n_iter=5, train_kwargs={'niter': 10}, verbose=True, pred_kwargs=pred_kwargs)

    from point_process import plotting
    plotting.validation_multiplot(res)