Ejemplo n.º 1
0
def get_processed_burglary_data():
    """
    burglary data with latlngs, pattern info.  index(pincnum) is an int
    """
    incident_burglary = get_burglary_raw_data()
    patterns = get_patterns()
    import python_utils.exceptions
 
    incident_burglary['latlng'] = incident_burglary['address'].apply(utils.exception_catcher_fxn_decorator(np.nan, (python_utils.exceptions.TooLazyToComputeException,))(lambda s: (utils.get_lat_lng('%s cambridge, MA' % s)['lat'], utils.get_lat_lng('%s cambridge, MA' % s)['lng'])))

    incident_burglary['lat'] = incident_burglary['latlng'].apply(utils.exception_catcher_fxn_decorator(np.nan, (TypeError,))(lambda x: x[0]))
    incident_burglary['lng'] = incident_burglary['latlng'].apply(utils.exception_catcher_fxn_decorator(np.nan, (TypeError,))(lambda x: x[1]))

    import string
    from datetime import datetime
    incident_burglary['date_object'] = incident_burglary['date_from'].apply(lambda s:datetime(int(string.split(string.split(s)[0], sep='/')[2]),int(string.split(string.split(s)[0], sep='/')[0]),int(string.split(string.split(s)[0], sep='/')[1])))
    min_date = min(incident_burglary['date_object'])
    seconds_in_year = 31622400.0
    incident_burglary['date_num'] = incident_burglary['date_object'].apply(lambda a_date: (a_date - min_date).total_seconds() / seconds_in_year)

    no_latlng_nas = incident_burglary[pd.isnull(incident_burglary['latlng'])==False]

    merged = pd.merge(patterns[['pattern']], no_latlng_nas, how='right', right_index=True, left_index = True)
    merged['in_pattern'] = (pd.isnull(merged['pattern']) == False)
    merged['tong_id'] = get_tong_patterns()
    print merged.head()
    return merged
Ejemplo n.º 2
0
import crime_data.constants as constants
import python_utils.python_utils.utils as utils
import pandas
import pdb
import numpy as np

"""
prepares data i will give to visualization, whatever it is
"""

incident = pandas.read_csv(constants.incidents_raw_file, index_col = 0)
burglary = pandas.read_csv(constants.burglary_raw_file, index_col = 0)

incident_burglary = pandas.concat([incident, burglary], axis=1, join='inner')

lat_lngs = incident_burglary['address'].apply(lambda s: utils.get_lat_lng('%s cambridge, MA' % s))
years = incident_burglary['date_from'].apply(utils.date_string_to_year)

patterns = pandas.read_csv(constants.merged_pattern_raw_file, index_col = 0)

def fix_pincnum(s):
    try:
        import re
        if pandas.isnull(s):
            return s
        s = re.sub('\-', '0', s)
        s = str(int(float(s)))
        if len(s) == 9:
            return int('%s0%s' % (s[0:4], s[5:]))
        else:
            return int(s)