"""
scratch data
"""

location_f = crime_data_fxns.house_break_f('latlng')
year_f = crime_data_fxns.house_break_f('year')
data_id_iterable = list(itertools.ifilter(lambda id: year_f(id) >= 2003 and year_f(id) <= 2005 and location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable()))
#data_id_iterable = list(itertools.ifilter(lambda id: location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable()))
cat_fs = [\
          utils.categorical_f(crime_data_fxns.house_break_f('location_of_entry'), [utils.equals_bin('Door: Front'), utils.equals_bin('Window: Ground'), utils.equals_bin('Door: Rear')]),\
          utils.categorical_f(crime_data_fxns.house_break_f('means_of_entry'), [utils.equals_bin('Pried'), utils.equals_bin('Unlocked'), utils.equals_bin('Shoved/Forced'), utils.equals_bin('Broke')]),\
#          utils.categorical_f(crime_data_fxns.house_break_f('categorization'), [utils.equals_bin('Professional'), utils.equals_bin('Unprofessional'), utils.equals_bin('Attempt')]),\
]
int_cat_fs = [utils.int_f_from_categorical_f(cat_f) for cat_f in cat_fs]
x_f = utils.series_f(*int_cat_fs)
#x_f = utils.series_f(utils.hard_code_f(0))
time_f = crime_data_fxns.house_break_f('date_num')
in_pattern_f = crime_data_fxns.in_pattern_f()
pattern_f = crime_data_fxns.house_break_f('pattern')
scratch_data = [tensor_scan_fxns.datum(id, time_f(id), location_f(id), x_f(id), in_pattern_f(id), pattern_f(id)) for id in data_id_iterable]

"""
scratch pattern_finder
"""
lat_min, lat_max, lng_min, lng_max = crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng
num_lat, num_lng = 16, 16
regions_F = utils.latlng_grid_regions_F(num_lat, num_lng)
background_count_F = tensor_scan_fxns.region_x_independent_tensor_count_F(tensor_scan_fxns.bin_region_count_F(0.00001), tensor_scan_fxns.joint_x_distribution_F(utils.independent_categorical_joint_distribution_F()))
foreground_count_F = tensor_scan_fxns.empirical_tensor_count_F()
optimizer_F = utils.iterative_argmax_F(utils.get_initial_subset_x_random(1.0), utils.cycle_through_coord_iterative_step(), 10, 0.001)
import numpy as np
import functools
import pdb


"""
simulated data for agglomerative method.  distributions for background should be wider
"""
background_agg_N = 300
pattern_agg_N = 300
agg_background_time_f = functools.partial(np.random.uniform, 0.0, 10.0)
agg_pattern_time_f = functools.partial(np.random.uniform, 4.9, 5.1)
agg_background_location_f = functools.partial(np.random.normal, (1.0, 1.0), 10.0)
agg_pattern_location_f = functools.partial(np.random.normal, (-1.0, -1.0), 0.2)
agg_background_x_f = utils.series_f(\
                                    functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\
                                    functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\
                                    )
agg_pattern_x_f = utils.series_f(\
                                 functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\
                                 functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\
)

background_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_background_x_f(), 0) for i in xrange(background_agg_N)]
pattern_time_diff_agg_data = [fxns.datum(i, agg_pattern_time_f(), agg_background_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)]
pattern_location_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_pattern_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)]
pattern_x_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_pattern_x_f(), 1) for i in xrange(pattern_agg_N)]


"""
simulated data for subsetscan.  have 2 different time distributions for back/foreground.  should be mostly disjoint.  location_f should be quite concentrated for foreground, so that it's higher at its mode than background.  
"""