def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', type=int, action='store', dest='data_num', help='choose which data set to use') if len(sys.argv) != 3: print 'Command e.g.: python findNearPlace.py -d 0(1,2)' sys.exit(1) para = parser.parse_args() if para.data_num == 0: location_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE1_1"] nearplace_outfile = settings["ROOT_PATH"] + settings["NEAR_PLACE_FILE1"] elif para.data_num == 1: location_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE2_1"] nearplace_outfile = settings["ROOT_PATH"] + settings["NEAR_PLACE_FILE2"] elif para.data_num == 2: location_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE3_3"] nearplace_outfile = settings["ROOT_PATH"] + settings["NEAR_PLACE_FILE3"] else: print 'Invalid choice of data set' sys.exit(1) loc_latlng = {} try: for entry in csv.reader(open(location_infile, 'rU')): pid, lat, lng = int(entry[0]), float(entry[2]), float(entry[3]) loc_latlng[pid] = (lat, lng) except: print entry sys.exit(1) # directly scanning all POIs to get answer, which is too slow '''writer = csv.writer(open(nearplace_outfile, "w"), lineterminator="\r\n") pids = loc_latlng.keys() for i in xrange(len(pids)): pid1 = pids[i] near_place = [] for j in xrange(len(pids)): pid2 = pids[j] dis = distance.distance(loc_latlng[pid1], loc_latlng[pid2]).miles if dis < settings["DISTANCE_THRESHOLD"]: near_place.append(pid2) writer.writerow([pid1] + near_place) print i''' # quad tree index_extent = (-90, -180, 90, 180) index = Quadtree(index_extent) for pid in loc_latlng: index.add(pid, loc_latlng[pid]) for pid in loc_latlng: start_time = time.clock() pid_set = findNearPlaceByQuadtree(loc_latlng, loc_latlng[pid], index.struct(), settings["DISTANCE_THRESHOLD"]) end_time = time.clock() print "Time Cost: %f(s)" % (end_time-start_time) raw_input() print len(pid_set) raw_input()