Example #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', type=int, action='store',
            dest='data_num', help='choose which data set to use')
    if len(sys.argv) != 3:
        print 'Command e.g.: python findNearPlace.py -d 0(1,2)'
        sys.exit(1)

    para = parser.parse_args()
    if para.data_num == 0:
        location_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE1_1"]
        nearplace_outfile = settings["ROOT_PATH"] + settings["NEAR_PLACE_FILE1"]
    elif para.data_num == 1:
        location_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE2_1"]
        nearplace_outfile = settings["ROOT_PATH"] + settings["NEAR_PLACE_FILE2"]
    elif para.data_num == 2:
        location_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE3_3"]
        nearplace_outfile = settings["ROOT_PATH"] + settings["NEAR_PLACE_FILE3"]
    else:
        print 'Invalid choice of data set'
        sys.exit(1)

    loc_latlng = {}
    try:
        for entry in csv.reader(open(location_infile, 'rU')):
            pid, lat, lng = int(entry[0]), float(entry[2]), float(entry[3])
            loc_latlng[pid] = (lat, lng)
    except:
        print entry
        sys.exit(1)

    # directly scanning all POIs to get answer, which is too slow
    '''writer = csv.writer(open(nearplace_outfile, "w"), lineterminator="\r\n")
    pids = loc_latlng.keys()
    for i in xrange(len(pids)):
        pid1 = pids[i]
        near_place = []
        for j in xrange(len(pids)):
            pid2 = pids[j]
            dis = distance.distance(loc_latlng[pid1], loc_latlng[pid2]).miles
            if dis < settings["DISTANCE_THRESHOLD"]:
                near_place.append(pid2)
        writer.writerow([pid1] + near_place)
        print i'''

    # quad tree
    index_extent = (-90, -180, 90, 180)
    index = Quadtree(index_extent)
    for pid in loc_latlng:
        index.add(pid, loc_latlng[pid])

    for pid in loc_latlng:
        start_time = time.clock()
        pid_set = findNearPlaceByQuadtree(loc_latlng,
                                          loc_latlng[pid],
                                          index.struct(),
                                          settings["DISTANCE_THRESHOLD"])
        end_time = time.clock()
        print "Time Cost: %f(s)" % (end_time-start_time)
        raw_input()
        print len(pid_set)
        raw_input()