""" scratch data """ location_f = crime_data_fxns.house_break_f('latlng') year_f = crime_data_fxns.house_break_f('year') data_id_iterable = list(itertools.ifilter(lambda id: year_f(id) >= 2003 and year_f(id) <= 2005 and location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable())) #data_id_iterable = list(itertools.ifilter(lambda id: location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable())) cat_fs = [\ utils.categorical_f(crime_data_fxns.house_break_f('location_of_entry'), [utils.equals_bin('Door: Front'), utils.equals_bin('Window: Ground'), utils.equals_bin('Door: Rear')]),\ utils.categorical_f(crime_data_fxns.house_break_f('means_of_entry'), [utils.equals_bin('Pried'), utils.equals_bin('Unlocked'), utils.equals_bin('Shoved/Forced'), utils.equals_bin('Broke')]),\ # utils.categorical_f(crime_data_fxns.house_break_f('categorization'), [utils.equals_bin('Professional'), utils.equals_bin('Unprofessional'), utils.equals_bin('Attempt')]),\ ] int_cat_fs = [utils.int_f_from_categorical_f(cat_f) for cat_f in cat_fs] x_f = utils.series_f(*int_cat_fs) #x_f = utils.series_f(utils.hard_code_f(0)) time_f = crime_data_fxns.house_break_f('date_num') in_pattern_f = crime_data_fxns.in_pattern_f() pattern_f = crime_data_fxns.house_break_f('pattern') scratch_data = [tensor_scan_fxns.datum(id, time_f(id), location_f(id), x_f(id), in_pattern_f(id), pattern_f(id)) for id in data_id_iterable] """ scratch pattern_finder """ lat_min, lat_max, lng_min, lng_max = crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng num_lat, num_lng = 16, 16 regions_F = utils.latlng_grid_regions_F(num_lat, num_lng) background_count_F = tensor_scan_fxns.region_x_independent_tensor_count_F(tensor_scan_fxns.bin_region_count_F(0.00001), tensor_scan_fxns.joint_x_distribution_F(utils.independent_categorical_joint_distribution_F())) foreground_count_F = tensor_scan_fxns.empirical_tensor_count_F() optimizer_F = utils.iterative_argmax_F(utils.get_initial_subset_x_random(1.0), utils.cycle_through_coord_iterative_step(), 10, 0.001)
import numpy as np import functools import pdb """ simulated data for agglomerative method. distributions for background should be wider """ background_agg_N = 300 pattern_agg_N = 300 agg_background_time_f = functools.partial(np.random.uniform, 0.0, 10.0) agg_pattern_time_f = functools.partial(np.random.uniform, 4.9, 5.1) agg_background_location_f = functools.partial(np.random.normal, (1.0, 1.0), 10.0) agg_pattern_location_f = functools.partial(np.random.normal, (-1.0, -1.0), 0.2) agg_background_x_f = utils.series_f(\ functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\ functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\ ) agg_pattern_x_f = utils.series_f(\ functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\ functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\ ) background_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_background_x_f(), 0) for i in xrange(background_agg_N)] pattern_time_diff_agg_data = [fxns.datum(i, agg_pattern_time_f(), agg_background_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)] pattern_location_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_pattern_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)] pattern_x_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_pattern_x_f(), 1) for i in xrange(pattern_agg_N)] """ simulated data for subsetscan. have 2 different time distributions for back/foreground. should be mostly disjoint. location_f should be quite concentrated for foreground, so that it's higher at its mode than background. """