location_f = crime_data_fxns.house_break_f('latlng')
year_f = crime_data_fxns.house_break_f('year')
data_id_iterable = list(itertools.ifilter(lambda id: year_f(id) >= 2003 and year_f(id) <= 2005 and location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable()))
#data_id_iterable = list(itertools.ifilter(lambda id: location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable()))
cat_fs = [\
          utils.categorical_f(crime_data_fxns.house_break_f('location_of_entry'), [utils.equals_bin('Door: Front'), utils.equals_bin('Window: Ground'), utils.equals_bin('Door: Rear')]),\
          utils.categorical_f(crime_data_fxns.house_break_f('means_of_entry'), [utils.equals_bin('Pried'), utils.equals_bin('Unlocked'), utils.equals_bin('Shoved/Forced'), utils.equals_bin('Broke')]),\
#          utils.categorical_f(crime_data_fxns.house_break_f('categorization'), [utils.equals_bin('Professional'), utils.equals_bin('Unprofessional'), utils.equals_bin('Attempt')]),\
]
int_cat_fs = [utils.int_f_from_categorical_f(cat_f) for cat_f in cat_fs]
x_f = utils.series_f(*int_cat_fs)
#x_f = utils.series_f(utils.hard_code_f(0))
time_f = crime_data_fxns.house_break_f('date_num')
in_pattern_f = crime_data_fxns.in_pattern_f()
pattern_f = crime_data_fxns.house_break_f('pattern')
scratch_data = [tensor_scan_fxns.datum(id, time_f(id), location_f(id), x_f(id), in_pattern_f(id), pattern_f(id)) for id in data_id_iterable]

"""
scratch pattern_finder
"""
lat_min, lat_max, lng_min, lng_max = crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng
num_lat, num_lng = 16, 16
regions_F = utils.latlng_grid_regions_F(num_lat, num_lng)
background_count_F = tensor_scan_fxns.region_x_independent_tensor_count_F(tensor_scan_fxns.bin_region_count_F(0.00001), tensor_scan_fxns.joint_x_distribution_F(utils.independent_categorical_joint_distribution_F()))
foreground_count_F = tensor_scan_fxns.empirical_tensor_count_F()
optimizer_F = utils.iterative_argmax_F(utils.get_initial_subset_x_random(1.0), utils.cycle_through_coord_iterative_step(), 10, 0.001)
p_value_F = tensor_scan_fxns.identity_test_stat_F()
pattern_F = tensor_scan_fxns.pattern_F(background_count_F, foreground_count_F, optimizer_F, tensor_scan_fxns.pattern_test_stat, p_value_F)
pattern_finder_regions_F = tensor_scan_fxns.background_and_foreground_regions_F(regions_F)
num_windows = 20
raw_pattern_finder_F = tensor_scan_fxns.raw_pattern_finder_F(tensor_scan_fxns.many_windows_iterator(num_windows), pattern_finder_regions_F, pattern_F)
Beispiel #2
0
"""
scratch data_iterable
"""

data_id_iterable = crime_data_fxns.AllHouseBurglaryIterable()
cat_fs = [\
          utils.categorical_f(crime_data_fxns.house_break_f('location_of_entry'), [utils.equals_bin('Door: Front'), utils.equals_bin('Window: Ground'), utils.equals_bin('Door: Rear')]),\
          utils.categorical_f(crime_data_fxns.house_break_f('categorization'), [utils.equals_bin('Professional'), utils.equals_bin('Unprofessional'), utils.equals_bin('Attempt')]),\
]
int_cat_fs = [utils.int_f_from_categorical_f(cat_f) for cat_f in cat_fs]
int_cat_fs_set_iterable = utils.get_powerset_iterator(int_cat_fs)
x_f_iterable = itertools.starmap(utils.series_f, int_cat_fs_set_iterable)
location_f = crime_data_fxns.house_break_f('latlng')
time_f = crime_data_fxns.house_break_f('date_num')
in_pattern_f = crime_data_fxns.in_pattern_f()
scratch_data_iterable = itertools.imap(lambda x_f: map(lambda id: tensor_scan_fxns.datum(id, time_f(id), location_f(id), x_f(id), in_pattern_f(id)), data_id_iterable), x_f_iterable)


"""
scratch pattern_finder_iterable
"""
num_lat_iterable = [5, 10, 15, 20, 30]
num_lng_iterable = num_lat_iterable
num_lat_num_lng_iterable = itertools.izip(num_lat_iterable, num_lng_iterable)
regions_F_iterable = itertools.starmap(utils.latlng_grid_regions_F, num_lat_num_lng_iterable)
pseudocounts_iterable = [0.001]
bin_region_count_F_iterable = itertools.imap(tensor_scan_fxns.bin_region_count_F, pseudocounts_iterable)
raw_joint_distribution_F_iterable = [utils.independent_categorical_joint_distribution_F()]
joint_x_distribution_F_iterable = itertools.imap(tensor_scan_fxns.joint_x_distribution_F, raw_joint_distribution_F_iterable)
background_count_F_iterable = itertools.starmap(tensor_scan_fxns.region_x_independent_tensor_count_F, itertools.product(bin_region_count_F_iterable, joint_x_distribution_F_iterable))
foreground_count_F_iterable = [tensor_scan_fxns.empirical_tensor_count_F()]
background_agg_N = 300
pattern_agg_N = 300
agg_background_time_f = functools.partial(np.random.uniform, 0.0, 10.0)
agg_pattern_time_f = functools.partial(np.random.uniform, 4.9, 5.1)
agg_background_location_f = functools.partial(np.random.normal, (1.0, 1.0), 10.0)
agg_pattern_location_f = functools.partial(np.random.normal, (-1.0, -1.0), 0.2)
agg_background_x_f = utils.series_f(\
                                    functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\
                                    functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\
                                    )
agg_pattern_x_f = utils.series_f(\
                                 functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\
                                 functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\
)

background_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_background_x_f(), 0) for i in xrange(background_agg_N)]
pattern_time_diff_agg_data = [fxns.datum(i, agg_pattern_time_f(), agg_background_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)]
pattern_location_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_pattern_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)]
pattern_x_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_pattern_x_f(), 1) for i in xrange(pattern_agg_N)]


"""
simulated data for subsetscan.  have 2 different time distributions for back/foreground.  should be mostly disjoint.  location_f should be quite concentrated for foreground, so that it's higher at its mode than background.  
"""
background_ss_N = 1900
pattern_ss_N = 100
ss_background_time_f = utils.generator_f(itertools.chain(iter(xrange(0,1000)), iter(xrange(1100,2000))))
ss_pattern_time_f = utils.generator_f(iter(xrange(1000, 1100)))
ss_background_location_f = functools.partial(utils.multivariate_random_uniform, [(0,10),(0,10)])
ss_pattern_location_f = functools.partial(utils.multivariate_random_uniform, [(-10,-5),(-10,-5)])
ss_background_x_f = utils.series_f(\