Ejemplo n.º 1
0
def assign_target(crime_data, grid_2d, cellsize_3d, mask=None, num_chunks=None, labeling=True, class_label=(0, 1)):
    """
    Return a target variable vector where each element correpondings to one example in 3D grid (x,y,t)
    For binary classification problem, the label class is determinded by number of crime incident in the grid cell (#crime>0: class 1; #crime=0: class 0)
    For regression problem, the count values will be used
    """
    crimepts = crime_data[['X_COORD','Y_COORD','GROUP']]
    #grd_t = np.unique(crimepts['GROUP'].values)
    g = crimepts['GROUP'].values
    grd_t = np.arange(np.min(g),np.max(g)+1)
    crimepts = crimepts.values
    grd_x, grd_y = grid_2d
    grid_3d = (grd_x,grd_y,grd_t)
    
    binned_pts = ks.bin_point_data_3d(crimepts, grid_3d, cellsize_3d, stat='count', geoIm=False)

    if num_chunks is None:
        num_chunks = len(np.unique(crime_data['GROUP']))
    if mask is None:
        mask = np.ones(len(grd_x)*len(grd_y)).astype('bool')
    
    target = binned_pts.ravel(order='F')[np.tile(mask,reps=num_chunks)]    
    if labeling:
        label = np.zeros(len(target))
        label[target==0] = class_label[0]
        label[target!=0] = class_label[1]
    else:
        label = None

    return target, label
Ejemplo n.º 2
0
def incident_feature(data,
                     grid_2d,
                     cellsize_3d,
                     group_seq,
                     mask=None,
                     num_chunks=None,
                     binary=True):
    """
    """
    IncPts = data.ix[(data['GROUP'] >= group_seq[0]) &
                     (data['GROUP'] <= group_seq[-1]),
                     ['X_COORD', 'Y_COORD', 'GROUP']]
    #grd_t = np.unique(IncPts['GROUP'].values)
    grd_t = group_seq
    IncPts = IncPts.values
    grd_x, grd_y = grid_2d
    grid_3d = (grd_x, grd_y, grd_t)

    if num_chunks is None:
        num_chunks = len(group_seq)
    if mask is None:
        mask = np.ones(len(grd_x) * len(grd_y)).astype('bool')

    binned_pts = ks.bin_point_data_3d(IncPts,
                                      grid_3d,
                                      cellsize_3d,
                                      stat='count',
                                      geoIm=False)
    if binary:
        # crime present/absent
        binned_pts = (binned_pts > 0).astype(int)

    inc_cnt = binned_pts.ravel(order='F')[np.tile(mask, reps=num_chunks),
                                          np.newaxis]
    return inc_cnt
Ejemplo n.º 3
0
def long_term_intensity_feature_subgroup(timeIdx, crime_data, group_seq, period, grid_2d, filter_2d, mask=None, density=True):
    group = group_seq[timeIdx]
    crimepts = crime_data.ix[(crime_data['GROUP']>=group-period[0]) & (crime_data['GROUP']<=group-period[1]),
                             ['X_COORD','Y_COORD']].values
    KS_LT = ks.kernel_smooth_2d_conv(crimepts, grid_2d, filter_2d, flatten=False)
    # flatten; np.newaxis will change shape of array from (**,) to (**,1); np.squeeze will do the opposite
    KS_LT = KS_LT.ravel(order='F')[mask,np.newaxis]
    if density==True:
        KS_LT = KS_LT/np.sum(KS_LT)
    return KS_LT
Ejemplo n.º 4
0
def short_term_intensity_feature_subgroup(timeIdx, crime_data, group_seq, period, grid_2d, filter_3d, mask=None, density=True):
    group = group_seq[timeIdx]
    crimepts = crime_data.ix[(crime_data['GROUP']>=group-period[0]) & (crime_data['GROUP']<=group-period[1]),
                             ['X_COORD','Y_COORD','GROUP']]
    #grd_t = np.unique(crimepts['GROUP'].values)
    grd_t = np.arange(group-period[0],group-period[1]+1)
    crimepts = crimepts.values
    grd_x, grd_y = grid_2d
    grid_3d = (grd_x,grd_y,grd_t)

    KS_ST = ks.kernel_smooth_separable_3d_conv(crimepts, grid_3d, filter_3d, flatten=False)  
    KS_ST = KS_ST[:,:,-1] # take out last time slice
    # flatten; np.newaxis will change shape of array from (**,) to (**,1); np.squeeze will do the opposite
    KS_ST = KS_ST.ravel(order='F')[mask,np.newaxis]
    if density==True:
        KS_ST = KS_ST/np.sum(KS_ST) 
    return KS_ST
def intensity_model_subgroup(timeIdx,
                             crime_data,
                             group_seq,
                             period,
                             grid_2d,
                             filter_2d,
                             mask=None,
                             density=True):
    group = group_seq[timeIdx]
    CrimePts = crime_data.ix[(crime_data['GROUP'] >= group - period[0]) &
                             (crime_data['GROUP'] <= group - period[1]),
                             ['X_COORD', 'Y_COORD']].values
    KS = ks.kernel_smooth_2d_conv(CrimePts, grid_2d, filter_2d, flatten=False)
    # flatten
    KS = KS.ravel(order='F')[mask, np.newaxis]
    if density == True:
        KS = KS / np.sum(KS)
    return KS
Ejemplo n.º 6
0
def consec_presence_feature(data, grid_2d, cellsize_3d, group_seq, buffer_period=0, mask=None, num_chunks=None, presence=True,truncate=True):
    """
    Count the time groups (e.g. weeks) of a consecutive presence/absence. For example, 
    the 11 groups of crime count for a certain cell is as follows
    [1, 0, 0, 1, 0, 0, 0, 2, 0, 1, 1].
    The corresponding consecutive zeros are 
    [ 0, 0, 1, 2, 0, 1, 2, 3, 0, 1, 0]
    """
    IncPts = data.ix[(data['GROUP']>=group_seq[0]-buffer_period) & (data['GROUP']<=group_seq[-1]),['X_COORD','Y_COORD','GROUP']]
    #grd_t = np.unique(IncPts['GROUP'].values)
    grd_t = np.arange(group_seq[0]-buffer_period,group_seq[-1]+1)
    IncPts = IncPts.values
    grd_x, grd_y = grid_2d
    grid_3d = (grd_x,grd_y,grd_t)

    if num_chunks is None:
        num_chunks = len(group_seq)
    if mask is None:
        mask = np.ones(len(grd_x)*len(grd_y)).astype('bool')
    
    binned_pts = ks.bin_point_data_3d(IncPts, grid_3d, cellsize_3d, stat='count', geoIm=False)
    if presence:
        # count consecutive presence  
        binned_pts = (binned_pts>0).astype(int)
    else:
        # count consecutive absense 
        binned_pts = (binned_pts==0).astype(int)
        
    consec_cnt = np.apply_along_axis(count_consec_val, 2, binned_pts, val=1)
    consec_cnt = consec_cnt[:,:,-len(group_seq):] #truncate buffer groups
    consec_cnt = consec_cnt.ravel(order='F')[np.tile(mask,reps=num_chunks),np.newaxis]

    if truncate:
       # Since consecutive numbers are unbounded, some will be affected by the buffer_period.
       consec_cnt[consec_cnt>buffer_period] = buffer_period        
    
    return consec_cnt  
Ejemplo n.º 7
0
      
 if 'geometry' in SpatialFeature.columns:
     SpatialFeature.drop('geometry', axis=1, inplace=True) # remove 'geometry' column
 
 # load POD proximity data
 pod_dist_pkl = filePath_spfeature+'PODdist_dataframe.pkl'
 with open(pod_dist_pkl,'rb') as input_file:
     POD_data['dist'] = pickle.load(input_file)   
                                     
 # Set up parameters 
 _, grd_x, grd_y, _, mask_grdInCity, _ = load_grid(grid_pkl)
 grid_2d = (grd_x,grd_y)
 cellsize_2d = (grd_x[1]-grd_x[0],grd_y[1]-grd_y[0]) 
 cellsize_3d = cellsize_2d+(1,) # (size_x, size_y,size_t)
 
 gauss_filter = ks.gaussian_filter_2d(bandwidth=sigma, window_size=(4*2*sigma[0]+1,4*2*sigma[0]+1))
 gauss_exp_filter = ks.gaussian_exponential_filter_3d(bandwidth=(sigma[0],sigma[1],lam),\
                                                      window_size=(4*2*sigma[0]+1,4*2*sigma[1]+1,period_ST[0]-period_ST[1]))['space-time']               
 
 varName_space = SpatialFeature.columns.values.tolist()
 varName = np.array(varName_space + varName_time + varName_pod + varName_weather + varName_STcrime + varName_LTcrime +\
                    varName_311 + varName_crime_pres + varName_crime_abs)
     
 var_type_idx = {}
 keys = ['space','time','POD','weather','LT','ST','311','pres','abs']
 subnames = [varName_space, varName_time, varName_pod, varName_weather, varName_LTcrime, varName_STcrime,\
             varName_311, varName_crime_pres, varName_crime_abs]
 for key,subname in zip(keys,subnames):
     var_type_idx[key]=np.in1d(varName,subname)
             
 #------------------------------
Ejemplo n.º 8
0
        SpatialFeature.drop('geometry', axis=1,
                            inplace=True)  # remove 'geometry' column

    # load POD proximity data
    pod_dist_pkl = filePath_spfeature + 'PODdist_dataframe.pkl'
    with open(pod_dist_pkl, 'rb') as input_file:
        POD_data['dist'] = pickle.load(input_file)

    # Set up parameters
    _, grd_x, grd_y, _, mask_grdInCity, _ = load_grid(grid_pkl)
    grid_2d = (grd_x, grd_y)
    cellsize_2d = (grd_x[1] - grd_x[0], grd_y[1] - grd_y[0])
    cellsize_3d = cellsize_2d + (1, )  # (size_x, size_y,size_t)

    gauss_filter = ks.gaussian_filter_2d(bandwidth=sigma,
                                         window_size=(4 * 2 * sigma[0] + 1,
                                                      4 * 2 * sigma[0] + 1))

    varName_space = SpatialFeature.columns.values.tolist()
    varName = np.array(varName_space + varName_time + varName_pod + varName_weather + varName_LTcrime +\
                       varName_311 + varName_crime_inc)

    var_type_idx = {}
    keys = ['space', 'time', 'POD', 'weather', 'LT', '311', 'crimeInc']
    subnames = [
        varName_space, varName_time, varName_pod, varName_weather,
        varName_LTcrime, varName_311, varName_crime_inc
    ]
    for key, subname in zip(keys, subnames):
        var_type_idx[key] = np.in1d(varName, subname)