Ejemplo n.º 1
def get_centroid_ca():
    cas = Tract.createAllCAObjects()
    centers = []
    for i in range(1, 78):
        ctd = cas[i].polygon.centroid
        centers.append([ctd.x, ctd.y])
    return centers
Ejemplo n.º 2
def generate_geographical_SpatialLag_ca(knearest=True, leaveOut=-1):
    Generate the distance matrix for CA pairs.
    If knearest is true, then select the 6-nearest neighboring CAs.
    Else, return the distance to all other CAs.

    leaveOut will select the CA and remove it. take value from 1 to 77
    cas = Tract.createAllCAObjects()
    centers = []
    iset = range(1, 78)
    if leaveOut > 0:
    for i in iset:
    W = np.zeros( (len(iset),len(iset)) )
    for i, src in enumerate(centers):
        for j, dst in enumerate(centers):
            if src != dst:
                W[i][j] = 1 / src.distance(dst)
        # find n-largest (n=6)
        if knearest == True:
            threshold = heapq.nlargest(6, W[i,:])[-1]
            for j in range(len(W[i,:])):
                W[i][j] = 0 if W[i][j] < threshold else W[i][j]
    return W
def CA_clustering_with_embedding():
    ge = get_graph_embedding_features("geo_all.txt")
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=6, max_iter=100).fit(ge)
    for idx, lab in enumerate(kmeans.labels_):
        print idx+1, lab
    colorMaps = ['blue', 'red', 'g', 'c', 'y', 'm', 'k', 'w']
    cas = Tract.createAllCAObjects()
    import matplotlib.pyplot as plt
    import descartes
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for k in cas:
        cak = cas[k].polygon
        ax.add_patch(descartes.PolygonPatch(cak, fc=colorMaps[kmeans.labels_[k-1]]))
        ax.annotate(str(k), [cak.centroid.x, cak.centroid.y])
    return kmeans, cas
Ejemplo n.º 11
def generate_transition_SocialLag(year=2010,
    Generate the spatial lag matrix from the transition flow connected CAs.
    0 - #total jobs
    1 - #jobs age under 29,
    2 - #jobs age from 30 to 54, 
    3 - #jobs above 55, 
    4 - #jobs earning under $1250/month, 
    5 - #jobs earnings from $1251 to $3333/month, 
    6 - #jobs above $3333/month,
    7 - #jobs in goods producing, 
    8 - #jobs in trade transportation, 
    9 - #jobs in other services

    if region == 'ca':
        ts = Tract.createAllCAObjects()
        fn = here + '/../data/chicago_ca_od_{0}.csv'.format(year)
    elif region == 'tract':
        ts = Tract.createAllTractObjects()
        fn = here + '/../data/chicago_od_tract_{0}.csv'.format(year)
    ordkey = sorted(ts.keys())

    listIdx = {}
    fin = open(fn)
    for line in fin:
        ls = line.split(",")
        srcid = int(ls[0][5:])
        dstid = int(ls[1][5:])
        val = int(ls[2 + lehd_type])
        if srcid in listIdx:
            listIdx[srcid][dstid] = val
            listIdx[srcid] = {}
            listIdx[srcid][dstid] = val

    if leaveOut > 0:

    W = np.zeros((len(ts), len(ts)))
    for srcid in ordkey:
        if srcid in listIdx:
            sdict = listIdx[srcid]
            if leaveOut in sdict:
                del sdict[leaveOut]
            for dstid, val in sdict.items():
                W[ordkey.index(srcid)][ordkey.index(dstid)] = val
            W[ordkey.index(srcid)] = np.zeros((1, len(ts)))

    # update diagonal as 0

#    if normalization != 'none':
#        for i in range(len(W)):
#            W[i,i] = 0
# first make all self-factor 0
    assert W.dtype == "float64"

    # normalization section
    if normalization == 'source':
        # source mean the residence
        W = np.transpose(W)
        sW = np.sum(W, axis=1, keepdims=True)
        W = W / sW
        assert abs(np.sum(W[1, ]) - 1) < 0.0000000001 and W.dtype == "float64"
    elif normalization == 'destination':  #
        # destination mean workplace
        sW = np.sum(W, axis=1)
        sW = sW.reshape((len(sW), 1))
        W = W / sW
    elif normalization == 'pair':
        sW = W + np.transpose(W)
        sW = np.sum(sW)
        W = W / sW

    # by default, the output is the workplace-to-residence count matrix
    return W
Ejemplo n.º 12
def generatePOIfeature(gridLevel='ca'):
    generate POI features and write out to a file
    regionLevel could be "ca" or "tract"
    ['Food', 'Residence', 'Travel', 'Arts & Entertainment', 
    'Outdoors & Recreation', 'College & Education', 'Nightlife', 
    'Professional', 'Shops', 'Event']
    if gridLevel == 'ca':
        cas = Tract.createAllCAObjects()
    elif gridLevel == 'tract':
        cas = Tract.createAllTractObjects()

    ordKey = sorted(cas.keys())

    gcn = np.zeros((len(cas), 3))  # check-in count, user count, and POI count
    gcat = {}

    with open('../data/all_POIs_chicago', 'r') as fin:
        POIs = pickle.load(fin)

    with open('category_hierarchy.pickle', 'r') as f2:
        poi_cat = pickle.load(f2)

    cnt = 0
    for poi in POIs.values():
        loc = Point(poi.location.lon, poi.location.lat)
        if poi.cat in poi_cat:
            cat = poi_cat[poi.cat]

        for key, grid in cas.items():
            if grid.polygon.contains(loc):
                gcn[ordKey.index(key), 0] += poi.checkin_count
                gcn[ordKey.index(key), 1] += poi.user_count
                gcn[ordKey.index(key), 2] += 1
                Build a two-level dictionary,
                first index by region id,
                then index by category id,
                finally, the value is number of POI under the category.
                if key in gcat:
                    if cat in gcat[key]:
                        gcat[key][cat] += 1
                        gcat[key][cat] = 1
                    gcat[key] = {}
                    gcat[key][cat] = 1

                # break the polygon loop
                cnt += 1

    s = 0
    hi_catgy = []
    for catdict in gcat.values():
        hi_catgy += catdict.keys()
        for c in catdict.values():
            s += c

    hi_catgy = list(set(hi_catgy))
    print hi_catgy

    gdist = np.zeros((len(cas), len(hi_catgy)))
    for key, distDict in gcat.items():
        for idx, cate in enumerate(hi_catgy):
            if cate in distDict:
                gdist[ordKey.index(key), idx] = distDict[cate]
                gdist[ordKey.index(key), idx] = 0

    if gridLevel == 'ca':
        np.savetxt(here + "/POI_dist.csv", gdist, delimiter=",")
        np.savetxt(here + "/POI_cnt.csv", gcn, delimiter=",")
    elif gridLevel == 'tract':
        np.savetxt(here + "/POI_dist_tract.csv", gdist, delimiter=",")
        np.savetxt(here + "/POI_cnt_tract.csv", gcn, delimiter=",")
        with open(here + "/POI_tract.pickle", 'w') as fout:
            pickle.dump(ordKey, fout)
            pickle.dump(gcat, fout)
