def apply_sepp_to_tabular_data(table_name): data, t0 = get_crimes_from_dump(table_name) est = lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=0.02) max_delta_t = 60 max_delta_d = 500 niter = 50 bg_kde_kwargs = { 'number_nn': [100, 15], } trigger_kde_kwargs = { 'min_bandwidth': [0.5, 10, 10], 'number_nn': 15, } sepp_class = pp_models.SeppStochasticNnReflected # filter data to provide correct quantity for training # data = data[data[:, 0] >= 151.] data = data[data[:, 0] <= 210.] r = apply_sepp_to_data(data, max_delta_t=max_delta_t, max_delta_d=max_delta_d, estimation_function=est, niter=niter, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs, sepp_class=sepp_class) return r
def run_me(location_dir, max_t, max_d): data_file = os.path.join(IN_DIR, location_dir, 'simulation.pickle') out_dir = os.path.join(OUT_DIR, location_dir) log_file = os.path.join(out_dir, 'simulation_%.2f-%.2f.log' % (max_t, max_d)) if not os.path.isdir(out_dir): os.makedirs(out_dir) # set loggers logger = logging.getLogger('simulation_study') logger.setLevel(logging.DEBUG) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) for t in ('point_process.models', 'kde.models'): this_logger = logging.getLogger(t) this_logger.setLevel(logging.DEBUG) this_logger.handlers = [] # clear existing handlers this_logger.addHandler(fh) # replace with the same file output logger.info("Logger set. Script started.") logger.info("Simulation study.") # load data with open(data_file, 'r') as f: data = pickle.load(f) logger.info("Loaded data.") logger.info("Instantiating SEPP object") r = pp_models.SeppStochasticNn(data=data, max_delta_d=max_d, max_delta_t=max_t, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs) p = estimation.estimator_bowers(data, r.linkage) r.p = p r.set_seed(42) try: logger.info("Starting training run.") r.train(niter=niter) except Exception as exc: logger.error(repr(exc)) res = None finally: file_stem = os.path.join(out_dir, 'simulation_%.2f-%.2f' % (max_t, max_d)) logger.info("Saving results (or None).") with open(file_stem + '-sepp_obj.pickle', 'w') as f: pickle.dump(r, f)
def validate_point_process( nicl_type=3, end_date=datetime.datetime(2012, 3, 1, tzinfo=pytz.utc), start_date=None, jiggle=None, num_validation=10, num_pp_iter=15, grid=100, time_step=1, pred_dt_plus=1, ): # get data res, t0, cid = get_crimes_by_type(nicl_type=nicl_type, only_new=True, jiggle_scale=jiggle, start_date=start_date) # find end_date in days from t0 end_days = (end_date - t0).total_seconds() / SEC_IN_DAY # get domain poly = get_camden_region() vb = validate.SeppValidation( res, spatial_domain=poly, model_kwargs={ 'max_delta_t': 60, 'max_delta_d': 1000, 'estimation_function': lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=0.02), 'trigger_kde_kwargs': { 'min_bandwidth': np.array([0.3, 5., 5.]) }, }) vb.set_sample_units(grid) vb.set_t_cutoff(end_days, b_train=False) res = vb.run(time_step=time_step, t_upper=end_days + num_validation, pred_dt_plus=pred_dt_plus, train_kwargs={'niter': num_pp_iter}, verbose=True) return res, vb
start_date=datetime.datetime(2001, 1, 1, 0) end_date=datetime.datetime(2003, 1, 1) first_training_size = 50 poly = compute_chicago_region() res, t0, cid = get_crimes_by_type( crime_type='burglary', start_date=start_date, end_date=end_date ) vb = validate.PpValidation(res, spatial_domain=poly, model_kwargs={ 'max_trigger_t': 30, 'max_trigger_d': 200, 'estimator': lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=0.02), }) vb.set_sample_units(250) vb.set_t_cutoff(first_training_size, b_train=False) sepp_res = vb.run(time_step=1, t_upper=first_training_size + 1, train_kwargs={'niter': 20, 'tol_p': 1e-5}, verbose=True) # use basic historic data spatial hotspot sk = hotspot.SKernelHistoric(first_training_size) # use heatmap from same period vb_sk = validation.ValidationBase(res, sk, poly) vb_sk.roc.copy_sample_units(vb.roc) # vb_sk._grid = vb._grid # vb_sk.centroids = vb.centroids # vb_sk.a = vb.a
'max_delta_t': 60, 'max_delta_d': 400, 'bg_kde_kwargs': { 'number_nn': [100, 15], 'min_bandwidth': [0.5, 20, 20], 'strict': False }, 'trigger_kde_kwargs': { 'number_nn': 15, 'min_bandwidth': [0.5, 20, 20], 'strict': False }, 'estimation_function': lambda x, y: estimation.estimator_bowers(x, y, **estimate_kwargs), 'seed': 42, # doesn't matter what this is, just want it fixed } niter = 75 ## CAMDEN @shutdown_decorator def camden(): start_day_numbers = [277, 307, 337, 367] # number of days from t0 (1/3/2011) poly = cad.get_camden_region()
c = simulate.MohlerSimulation() # c.off_sigma_x = 1.0 c.seed(42) c.run() data = np.array(c.data) # (t, x, y) # sort data by time ascending (may be done already?) data = data[data[:, 0].argsort()] r0 = pp_class(data=data, max_delta_d=max_delta_d, max_delta_t=max_delta_t, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs, parallel=False) p0 = estimation.estimator_bowers(data, r0.linkage, ct=1, cd=10) r0.p = p0 r0.set_seed(42) alpha = 0.1 new_data = data.copy() new_data[:, 1:] *= alpha r1 = pp_class(data=new_data, max_delta_d=max_delta_d * alpha, max_delta_t=max_delta_t, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs, parallel=False)
# vb.set_t_cutoff(4.0) # use basic historic data spatial hotspot # sk = hotspot.SKernelHistoric(2) # use heatmap from final 2 days data # vb = ValidationBase(data, hotspot.Hotspot, camden.mpoly, model_args=(sk,)) # vb.set_sample_units(grid_length=200) # vb.set_t_cutoff(4.0) # use Point process learning method # vb = SeppValidation(data, model_kwargs={ # 'max_delta_t': 80, # 'max_delta_d': 0.75, # 'estimation_function': lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=10), # }) sepp = models.SeppStochasticNn(max_delta_t=80, max_delta_d=0.75, estimation_function=lambda x, y: estimation.estimator_bowers(x, y, ct=1, cd=10)) vb = SeppValidationFixedModel(data, sepp) pred_kwargs = { 'include': ('full', 'full_static', 'bg', 'trigger') } vb.set_sample_units(3, n_sample_per_grid=10) vb.set_t_cutoff(400, b_train=False) res = vb.run(time_step=5, n_iter=5, train_kwargs={'niter': 10}, verbose=True, pred_kwargs=pred_kwargs) from point_process import plotting plotting.validation_multiplot(res)