Example #1
0
def validate_point_process(
        start_date=datetime.date(2011, 3, 1),
        end_date=datetime.date(2012, 3, 31),
        initial_cutoff=212,
        num_validation=100,
        num_pp_iter=100,
        grid=250,
        n_sample_per_grid=20,
        domain=None,
        pp_class=pp_models.SeppStochasticNn,
        model_kwargs=None):

    if not model_kwargs:
        model_kwargs = {
            'max_delta_t': 150,
            'max_delta_d': 500,
            'bg_kde_kwargs': {'number_nn': [101, 16]},
            'trigger_kde_kwargs': {'number_nn': 15},
            'remove_conincident_pairs': False,
            'estimation_function': lambda x, y: estimation.estimator_exp_gaussian(x, y, 0.1, 50, frac_bg=None),
            'seed': 42,
        }

    if start_date + datetime.timedelta(days=initial_cutoff + num_validation) > end_date:
        warnings.warn("Requested number of validation runs is too large for the data size")

    if domain is None:
        domain = compute_chicago_region()

    data, t0, cid = get_crimes_by_type(
        crime_type='burglary',
        start_date=start_date,
        end_date=end_date,
        domain=domain,
    )

    sepp = pp_class(data=data, **model_kwargs)

    vb = validate.SeppValidationFixedModelIntegration(data, spatial_domain=domain, cutoff_t=initial_cutoff)
    vb.set_sample_units(grid, n_sample_per_grid=n_sample_per_grid)

    ## TODO: check the number of iterations reported is as expected here
    res = vb.run(time_step=1, n_iter=num_validation,
                 train_kwargs={'niter': num_pp_iter},
                 verbose=True)

    return res, vb
Example #2
0
def apply_sepp_stochastic_nn(data,
                             data_index,
                             domain,
                             grid_squares=None,
                             max_t=90,
                             max_d=500,
                             num_nn=(100, 15),
                             niter_training=50,
                             num_sample_points=10,
                             seed=43):

    est_fun = lambda x, y: estimation.estimator_bowers_fixed_proportion_bg(
        x, y, ct=1, cd=10, frac_bg=0.5)
    trigger_kde_kwargs = {'strict': False, 'number_nn': num_nn[-1]}
    bg_kde_kwargs = {'strict': True, 'number_nn': num_nn}

    sepp = pp_models.SeppStochasticNn(data=data,
                                      max_delta_t=max_t,
                                      max_delta_d=max_d,
                                      seed=seed,
                                      estimation_function=est_fun,
                                      trigger_kde_kwargs=trigger_kde_kwargs,
                                      bg_kde_kwargs=bg_kde_kwargs)

    vb = validate.SeppValidationFixedModelIntegration(data=data,
                                                      model=sepp,
                                                      data_index=data_index,
                                                      spatial_domain=domain,
                                                      cutoff_t=INITIAL_CUTOFF +
                                                      T0)

    if grid_squares:
        vb.roc.set_sample_units_predefined(grid_squares, num_sample_points)
    else:
        vb.set_sample_units(250, num_sample_points)
    res = vb.run(time_step=1,
                 n_iter=100,
                 verbose=True,
                 train_kwargs={'niter': niter_training})
    return res
sepp_objs = {}
model_objs = {}
res = {}
vb_objs = {}
data_dict = {}
cid_dict = {}

for k in kinds:

    data, t0, cid = cad.get_crimes_from_dump('monsuru_cad_%s' % k)
    # filter: day 210 is 27/9/2011, so use everything LESS THAN 211

    ### SeppValidationFixedModel with integration ROC sampling

    b_sepp = True
    sepp = pp_class(data=data, **model_kwargs)
    vb = validate.SeppValidationFixedModelIntegration(
        data=data,
        model=sepp,
        data_index=cid,
        spatial_domain=poly,
        cutoff_t=INITIAL_CUTOFF,
    )

    vb.set_sample_units(250, num_sample_points)
    res[k] = vb.run(time_step=1,
                    n_iter=num_validation,
                    verbose=True,
                    train_kwargs={'niter': niter})

    sepp_objs[k] = vb.model
Example #4
0
def camden():

    start_day_numbers = [277, 307, 337,
                         367]  # number of days from t0 (1/3/2011)

    poly = cad.get_camden_region()
    qset = models.Division.objects.filter(type='cad_250m_grid')
    qset = sorted(qset, key=lambda x: int(x.name))
    grid_squares = [t.mpoly[0] for t in qset]

    # define crime types
    crime_types = {
        'burglary': 3,
        'robbery': 5,
        'theft_of_vehicle': 6,
        'violence': 1,
    }

    for (name, n) in crime_types.items():
        print "Crime type: %s" % name
        try:
            data, t0, cid = cad.get_crimes_by_type(n)
            t_upper = data[-1, 0]

            sepp_objs = {}
            vb_objs = {}
            res = {}

            for sdn in start_day_numbers:

                vb = validate.SeppValidationFixedModelIntegration(
                    data=data,
                    pp_class=pp_models.SeppStochasticNn,
                    data_index=cid,
                    spatial_domain=poly,
                    cutoff_t=sdn,
                    model_kwargs=model_kwargs,
                )

                vb.set_sample_units(grid_squares, num_sample_points)

                try:
                    res[sdn] = vb.run(time_step=1,
                                      t_upper=t_upper,
                                      verbose=True,
                                      train_kwargs={'niter': niter})
                    sepp_objs[sdn] = vb.model
                    vb_objs[sdn] = vb
                except Exception as exc:
                    print exc
                    res[sdn] = None
                    sepp_objs[sdn] = None
                    vb_objs[sdn] = None

            with open(
                    os.path.join(ROOT_DIR, 'camden', 'model_ageing', name,
                                 'sepp_obj.pickle'), 'w') as f:
                dill.dump(sepp_objs, f)
            with open(
                    os.path.join(ROOT_DIR, 'camden', 'model_ageing', name,
                                 'validation_obj.pickle'), 'w') as f:
                dill.dump(vb_objs, f)
            with open(
                    os.path.join(ROOT_DIR, 'camden', 'model_ageing', name,
                                 'validation.pickle'), 'w') as f:
                dill.dump(res, f)

        except Exception as exc:
            with open(
                    os.path.join(ROOT_DIR, 'camden', 'model_ageing', name,
                                 'errors'), 'a') as f:
                f.write(repr(exc))
                f.write('\n')
Example #5
0
def run_chicago():

    start_date = datetime.datetime(2011, 3, 1)
    end_date = start_date + datetime.timedelta(days=277 + 480)
    start_day_numbers = [277 + 30 * i for i in range(18)]

    poly = chicago.compute_chicago_region()
    south = models.ChicagoDivision.objects.get(name='South').mpoly

    # define crime types
    crime_types = {
        'burglary': 'burglary',
        'robbery': 'robbery',
        'theft_of_vehicle': 'motor vehicle theft',
        'violence': 'assault',
    }

    for (name, pt) in crime_types.items():
        print "Crime type: %s" % name
        base_dir = os.path.join(ROOT_DIR, 'chicago', 'model_ageing', name)

        try:
            data, t0, cid = chicago.get_crimes_by_type(crime_type=pt,
                                                       start_date=start_date,
                                                       end_date=end_date,
                                                       domain=south)
            t_upper = data[-1, 0]

            sepp_objs = {}
            vb_objs = {}
            res = {}

            for sdn in start_day_numbers:

                vb = validate.SeppValidationFixedModelIntegration(
                    data=data,
                    pp_class=pp_models.SeppStochasticNn,
                    data_index=cid,
                    spatial_domain=south,
                    cutoff_t=sdn,
                    model_kwargs=model_kwargs,
                )

                vb.set_sample_units(250, num_sample_points)

                try:
                    res[sdn] = vb.run(time_step=1,
                                      t_upper=t_upper,
                                      verbose=True,
                                      train_kwargs={'niter': niter})
                    sepp_objs[sdn] = vb.model
                    vb_objs[sdn] = vb
                except Exception as exc:
                    print exc
                    res[sdn] = None
                    sepp_objs[sdn] = None
                    vb_objs[sdn] = None

            if not os.path.isdir(base_dir):
                os.makedirs(base_dir)

            with open(os.path.join(base_dir, 'sepp_obj.pickle'), 'w') as f:
                dill.dump(sepp_objs, f)
            with open(os.path.join(base_dir, 'validation_obj.pickle'),
                      'w') as f:
                dill.dump(vb_objs, f)
            with open(os.path.join(base_dir, 'validation.pickle'), 'w') as f:
                dill.dump(res, f)

        except Exception as exc:
            with open(os.path.join(base_dir, 'errors'), 'a') as f:
                f.write(repr(exc))
                f.write('\n')
def run_me(data, domain, out_dir, run_name, pp_class):
    # data_file = os.path.join(IN_DIR, location_dir, '%s.pickle' % crime_type)
    # out_dir = os.path.join(OUT_DIR, location_dir, OUT_SUBDIR)
    log_file = os.path.join(out_dir, '%s.log' % run_name)

    if not os.path.isdir(out_dir):
        try:
            os.makedirs(out_dir)
        except OSError:
            # wait a moment, just in case another process has just done the folder creation
            time.sleep(1)
            if not os.path.isdir(out_dir):
                raise

    # set loggers
    logger = logging.getLogger(run_name)
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    for t in ('point_process.models', 'validation.validation'):
        this_logger = logging.getLogger(t)
        this_logger.setLevel(logging.DEBUG)
        this_logger.handlers = []  # clear existing handlers
        this_logger.addHandler(fh)  # replace with the same file output

    logger.info("Logger set.  Script started.")

    # check that num_validation iterations is feasible
    if start_day_number + num_validation - 1 > data[-1, 0]:
        this_num_validation = int(data[-1, 0]) - start_day_number + 1
        logger.info("Can only do %d validation runs" % this_num_validation)
    else:
        this_num_validation = num_validation

    logger.info("Instantiating validation object")

    sepp = pp_class(data=data, **model_kwargs)
    vb = validate.SeppValidationFixedModelIntegration(
        data=data,
        model=sepp,
        spatial_domain=domain,
        cutoff_t=start_day_number,
    )

    logger.info("Setting validation grid")
    vb.set_sample_units(grid_size, num_sample_points)
    file_stem = os.path.join(out_dir, run_name)
    try:
        logger.info("Starting validation run.")
        res = vb.run(time_step=1,
                     n_iter=this_num_validation,
                     verbose=True,
                     train_kwargs={'niter': niter},
                     pred_kwargs={'include': pred_include})
    except Exception as exc:
        logger.error(repr(exc))
        res = None
    finally:
        logger.info("Saving results (or None).")
        with open(file_stem + '-validation.pickle', 'w') as f:
            pickle.dump(res, f)
Example #7
0
def chicago_south_side(min_bandwidth, crime_type):
    data_file = os.path.join(IN_DIR, 'chicago_south', '%s.pickle' % crime_type)
    poly_file = os.path.join(IN_DIR, 'boundaries.pickle')
    out_dir = os.path.join(OUT_DIR, 'chicago_south',
                           'min_bandwidth_trigger_only')
    log_file = os.path.join(
        out_dir,
        crime_type + '-' + '-'.join(['%.2f' % t
                                     for t in min_bandwidth]) + '.log')

    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    # set loggers
    logger = logging.getLogger('vary_min_bandwidths.chicago_south_side')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    for t in ('point_process.models', 'validation.validation'):
        this_logger = logging.getLogger(t)
        this_logger.setLevel(logging.DEBUG)
        this_logger.handlers = []  # clear existing handlers
        this_logger.addHandler(fh)  # replace with the same file output

    logger.info("Logger set.  Script started.")
    logger.info("Crime type: %s. Min bandwidths %s" %
                (crime_type, str(min_bandwidth)))

    # load data
    with open(data_file, 'r') as f:
        data = pickle.load(f)
    with open(poly_file, 'r') as f:
        boundaries = pickle.load(f)
        south_side_poly = boundaries['chicago_south']
    logger.info("Loaded data.")

    # check that num_validation iterations is feasible
    if start_day_number + num_validation - 1 > data[-1, 0]:
        this_num_validation = int(data[-1, 0]) - start_day_number + 1
        logger.info("Can only do %d validation runs" % this_num_validation)
    else:
        this_num_validation = num_validation

    model_kwargs['trigger_kde_kwargs']['min_bandwidth'] = [
        min_bandwidth[0], min_bandwidth[1], min_bandwidth[1]
    ]
    # model_kwargs['bg_kde_kwargs']['min_bandwidth'] = model_kwargs['trigger_kde_kwargs']['min_bandwidth']
    logger.info("Instantiating validation object")
    vb = validate.SeppValidationFixedModelIntegration(
        data=data,
        pp_class=pp_models.SeppStochasticNn,
        spatial_domain=south_side_poly,
        cutoff_t=start_day_number,
        model_kwargs=model_kwargs,
    )

    logger.info("Setting validation grid")
    vb.set_sample_units(250, num_sample_points)
    try:
        logger.info("Starting validation run.")
        res = vb.run(time_step=1,
                     n_iter=this_num_validation,
                     verbose=True,
                     train_kwargs={'niter': niter})
    except Exception as exc:
        logger.error(repr(exc))
        raise exc
    else:
        logger.info("Saving results.")
        file_stem = os.path.join(
            out_dir,
            crime_type + '_' + '-'.join(['%.2f' % t for t in min_bandwidth]))
        with open(file_stem + '-validation.pickle', 'w') as f:
            pickle.dump(res, f)
        with open(file_stem + '-vb_obj.pickle', 'w') as f:
            pickle.dump(vb, f)
def camden():

    # start_date is the FIRST DAY OF THE PREDICTION
    start_date = datetime.datetime(2011, 12, 3)
    # equivalent in number of days from t0 (1/3/2011)
    start_day_number = 277
    # start_day_number = 385

    num_validation = 120

    min_t_bds = [0, 0.5, 1, 2]
    min_d_bds = [0, 20, 50, 100]

    tt, dd = np.meshgrid(min_t_bds, min_d_bds)

    poly = cad.get_camden_region()
    qset = models.Division.objects.filter(type='cad_250m_grid')
    qset = sorted(qset, key=lambda x: int(x.name))
    grid_squares = [t.mpoly[0] for t in qset]

    # define crime types
    crime_types = {
        'burglary': 3,
        'robbery': 5,
        'theft_of_vehicle': 6,
        'violence': 1,
    }

    for (name, n) in crime_types.items():
        print "Crime type: %s" % name
        base_dir = os.path.join(ROOT_DIR, 'camden', 'min_bandwidth', name)
        if not os.path.isdir(base_dir):
            os.makedirs(base_dir)

        try:
            data, t0, cid = cad.get_crimes_by_type(n)

            sepp_objs = {}
            vb_objs = {}
            res = {}

            # jiggle grid-snapped points
            # data = spatial.jiggle_on_grid_points(data, grid_squares)

            # check that num_validation iterations is feasible
            if start_day_number + num_validation - 1 > data[-1, 0]:
                this_num_validation = int(data[-1, 0]) - start_day_number + 1
                print "Can only do %d validation runs" % this_num_validation
            else:
                this_num_validation = num_validation

            for t, d in zip(tt.flat, dd.flat):
                model_kwargs['trigger_kde_kwargs']['min_bandwidth'] = [t, d, d]
                model_kwargs['bg_kde_kwargs']['min_bandwidth'] = [t, d, d]
                vb = validate.SeppValidationFixedModelIntegration(
                    data=data,
                    pp_class=pp_models.SeppStochasticNn,
                    data_index=cid,
                    spatial_domain=poly,
                    cutoff_t=start_day_number,
                    model_kwargs=model_kwargs,
                )

                vb.set_sample_units(grid_squares, num_sample_points)

                try:
                    res[(t, d)] = vb.run(time_step=1,
                                         n_iter=this_num_validation,
                                         verbose=True,
                                         train_kwargs={'niter': niter})
                    sepp_objs[(t, d)] = vb.model
                    vb_objs[(t, d)] = vb
                except Exception as exc:
                    print exc
                    res[(t, d)] = None
                    sepp_objs[(t, d)] = None
                    vb_objs[(t, d)] = None
            with open(os.path.join(base_dir, 'sepp_obj.pickle'), 'w') as f:
                dill.dump(sepp_objs, f)
            with open(os.path.join(base_dir, 'validation_obj.pickle'),
                      'w') as f:
                dill.dump(vb_objs, f)
            with open(os.path.join(base_dir, 'validation.pickle'), 'w') as f:
                dill.dump(res, f)

        except Exception as exc:
            with open(os.path.join(base_dir, 'errors'), 'a') as f:
                f.write(repr(exc))
                f.write('\n')
def run_me(location_dir, location_poly, max_delta_t, max_delta_d, crime_type):
    data_file = os.path.join(IN_DIR, location_dir, '%s.pickle' % crime_type)
    out_dir = os.path.join(OUT_DIR, location_dir, 'max_triggers')
    log_file = os.path.join(out_dir, crime_type + '_' + '%d-%d.log' % (max_delta_t, max_delta_d))

    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    # set loggers
    logger = logging.getLogger('vary_max_triggers.%s' % location_dir)
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(log_file)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    for t in ('point_process.models', 'validation.validation'):
        this_logger = logging.getLogger(t)
        this_logger.setLevel(logging.DEBUG)
        this_logger.handlers = []  # clear existing handlers
        this_logger.addHandler(fh)  # replace with the same file output

    logger.info("Logger set.  Script started.")
    logger.info("Crime type: %s. Max delta t %d, max delta d %d" % (crime_type, max_delta_t, max_delta_d))

    # load data
    with open(data_file, 'r') as f:
        data = pickle.load(f)
    logger.info("Loaded data.")

    # check that num_validation iterations is feasible
    if start_day_number + num_validation - 1 > data[-1, 0]:
        this_num_validation = int(data[-1, 0]) - start_day_number + 1
        logger.info("Can only do %d validation runs" % this_num_validation)
    else:
        this_num_validation = num_validation

    model_kwargs['max_delta_t'] = max_delta_t
    model_kwargs['max_delta_d'] = max_delta_d
    logger.info("Instantiating validation object")
    vb = validate.SeppValidationFixedModelIntegration(
        data=data,
        pp_class=pp_models.SeppStochasticNn,
        spatial_domain=location_poly,
        cutoff_t=start_day_number,
        model_kwargs=model_kwargs,
    )

    logger.info("Setting validation grid")
    vb.set_sample_units(250, num_sample_points)
    file_stem = os.path.join(out_dir, crime_type + '_' + '%d-%d' % (max_delta_t, max_delta_d))
    try:
        logger.info("Starting validation run.")
        res = vb.run(time_step=1, n_iter=this_num_validation, verbose=True, train_kwargs={'niter': niter})
    except Exception as exc:
        logger.error(repr(exc))
        res = None
    finally:
        logger.info("Saving results (or None).")
        with open(file_stem + '-validation.pickle', 'w') as f:
            pickle.dump(res, f)
        with open(file_stem + '-vb_obj.pickle', 'w') as f:
            pickle.dump(vb, f)