def get_processed_burglary_data(): """ burglary data with latlngs, pattern info. index(pincnum) is an int """ incident_burglary = get_burglary_raw_data() patterns = get_patterns() import python_utils.exceptions incident_burglary['latlng'] = incident_burglary['address'].apply(utils.exception_catcher_fxn_decorator(np.nan, (python_utils.exceptions.TooLazyToComputeException,))(lambda s: (utils.get_lat_lng('%s cambridge, MA' % s)['lat'], utils.get_lat_lng('%s cambridge, MA' % s)['lng']))) incident_burglary['lat'] = incident_burglary['latlng'].apply(utils.exception_catcher_fxn_decorator(np.nan, (TypeError,))(lambda x: x[0])) incident_burglary['lng'] = incident_burglary['latlng'].apply(utils.exception_catcher_fxn_decorator(np.nan, (TypeError,))(lambda x: x[1])) import string from datetime import datetime incident_burglary['date_object'] = incident_burglary['date_from'].apply(lambda s:datetime(int(string.split(string.split(s)[0], sep='/')[2]),int(string.split(string.split(s)[0], sep='/')[0]),int(string.split(string.split(s)[0], sep='/')[1]))) min_date = min(incident_burglary['date_object']) seconds_in_year = 31622400.0 incident_burglary['date_num'] = incident_burglary['date_object'].apply(lambda a_date: (a_date - min_date).total_seconds() / seconds_in_year) no_latlng_nas = incident_burglary[pd.isnull(incident_burglary['latlng'])==False] merged = pd.merge(patterns[['pattern']], no_latlng_nas, how='right', right_index=True, left_index = True) merged['in_pattern'] = (pd.isnull(merged['pattern']) == False) merged['tong_id'] = get_tong_patterns() print merged.head() return merged
import crime_data.constants as constants import python_utils.python_utils.utils as utils import pandas import pdb import numpy as np """ prepares data i will give to visualization, whatever it is """ incident = pandas.read_csv(constants.incidents_raw_file, index_col = 0) burglary = pandas.read_csv(constants.burglary_raw_file, index_col = 0) incident_burglary = pandas.concat([incident, burglary], axis=1, join='inner') lat_lngs = incident_burglary['address'].apply(lambda s: utils.get_lat_lng('%s cambridge, MA' % s)) years = incident_burglary['date_from'].apply(utils.date_string_to_year) patterns = pandas.read_csv(constants.merged_pattern_raw_file, index_col = 0) def fix_pincnum(s): try: import re if pandas.isnull(s): return s s = re.sub('\-', '0', s) s = str(int(float(s))) if len(s) == 9: return int('%s0%s' % (s[0:4], s[5:])) else: return int(s)