예제 #1
0
def ordinary_kriging_evaluation(year):
    """
    Under leave-one-out setting, use only crime rate.
    """
    from pykrige.ok import OrdinaryKriging
    from sklearn.model_selection import LeaveOneOut

    y_cnt = retrieve_crime_count(year)
    demo = generate_corina_features()
    population = demo[1][:,0].reshape(demo[1].shape[0], 1)
    Y = y_cnt / population * 10000
    
    coords = get_centroid_ca()
    
    data = np.concatenate((coords, Y), axis=1)
    loo = LeaveOneOut()
    
    errors = []
    for train_idx, test_idx in loo.split(data):
        x_train = data[train_idx,:]
        coords_test = data[test_idx, [0,1]]
        y_test = data[test_idx, 2]
        
        OK = OrdinaryKriging(x_train[:,0], x_train[:,1], x_train[:,2], variogram_model="linear")
        z, var = OK.execute("points", coords_test[0], coords_test[1])
        errors.append(abs(z[0] - y_test[0]))
    print np.mean(errors), np.mean(errors) / np.mean(Y)
    return errors
def predict_crime_with_embedding():
    ge = get_graph_embedding_features("taxi_all.txt")
    
    y_cnt = retrieve_crime_count(2010)
    demo = generate_corina_features()
    population = demo[1][:,0].reshape(demo[1].shape[0], 1)
    y = y_cnt / population * 10000
    
    er = leaveOneOut_error(y, ge)
    print er
    return er
예제 #3
0
def predict_crime_with_embedding():
    ge = get_graph_embedding_features("taxi_all.txt")

    y_cnt = retrieve_crime_count(2010)
    demo = generate_corina_features()
    population = demo[1][:, 0].reshape(demo[1].shape[0], 1)
    y = y_cnt / population * 10000

    er = leaveOneOut_error(y, ge)
    print er
    return er
예제 #4
0
def extract_raw_samples(year=2010, crime_t=['total'], crime_rate=True):
    """
    Extract all samples with raw labels and features. Return None if the 
    corresponding feature is not selected.
    
    This function is called once only to avoid unnecessary disk I/O.
    
    Input:
    year        - which year to study
    crime_t     - crime types of interest, e.g. 'total'
    crime_rate  - predict crime_rate or not (count)
    
    Output:
    Y - crime rate / count
    D - demo feature
    P - POI feature
    Tf - taxi flow matrix (count)
    Gd - geo weight matrix
    """
    # Crime count
    y_cnt = retrieve_crime_count(year, col = crime_t)
    
    # Crime rate / count
    demo = generate_corina_features()
    population = demo[1][:,0].reshape(demo[1].shape[0], 1)
    Y = y_cnt / population * 10000 if crime_rate else y_cnt
    assert(Y.shape == (N,1))
    
    # Demo features
    D = demo[1]
    
    # POI features
    P = getFourSquarePOIDistribution(useRatio=False)
    
    # Taxi flow matrix
    Tf = getTaxiFlow(normalization="none")
    
    # Geo weight matrix
    Gd = generate_geographical_SpatialLag_ca()
    
    return Y, D, P, Tf, Gd
def extract_raw_samples(year=2010, crime_t=['total'], crime_rate=True):
    """
    Extract all samples with raw labels and features. Return None if the 
    corresponding feature is not selected.
    
    This function is called once only to avoid unnecessary disk I/O.
    
    Input:
    year        - which year to study
    crime_t     - crime types of interest, e.g. 'total'
    crime_rate  - predict crime_rate or not (count)
    
    Output:
    Y - crime rate / count
    D - demo feature
    P - POI feature
    Tf - taxi flow matrix (count)
    Gd - geo weight matrix
    """
    # Crime count
    y_cnt = retrieve_crime_count(year, col = crime_t)
    
    # Crime rate / count
    demo = generate_corina_features()
    population = demo[1][:,0].reshape(demo[1].shape[0], 1)
    Y = y_cnt / population * 10000 if crime_rate else y_cnt
    assert(Y.shape == (77,1))
    
    # Demo features
    D = demo[1]
    
    # POI features
    P = getFourSquarePOIDistribution(useRatio=False)
    
    # Taxi flow matrix
    Tf = getTaxiFlow(normalization="none")
    
    # Geo weight matrix
    Gd = generate_geographical_SpatialLag_ca()
    
    return Y, D, P, Tf, Gd
        mf = pickle.load(fin)
        line = pickle.load(fin)
        dwt = pickle.load(fin)
        dws = pickle.load(fin)
        hdge = pickle.load(fin)

    for h in range(24):
        Fn = similarityMatrix(hdge[h])
        x, y, xp, yp, lp = generate_point(Fn, Y)
        f = plt.figure()
        plt.scatter(x, y, color='red')
        plt.show()


demo = generate_corina_features()
y_cnt = retrieve_crime_count(2013)

population = demo[1][:, 0].reshape(demo[1].shape[0], 1)
Y = y_cnt / population * 10000

F = getTaxiFlow(normalization="none")

x, y, xp, yp, lp = generate_point(F, Y)

plt.rc("axes", linewidth=2)
f = plt.figure(figsize=(8, 6))
plt.scatter(x, y, s=16)

plt.plot([-100, -100, 3500, -100], [3000, -3000, 0, 3000],
         linewidth=2,
         color='blue')