def test_clusterizer(self): dao = Dao(DAO_URL, sql_logging=SQL_LOG) dao.load_gtfs(DUMMY_GTFS) # Merge stops closer than 300m together sc = SpatialClusterizer(300.0) for stop in dao.stops(): sc.add_point(stop) sc.clusterize() # for cluster in sc.clusters(): # print("---CLUSTER: %d stops" % (len(cluster))) # for stop in cluster: # print("%s %s" % (stop.stop_id, stop.stop_name)) gare1 = dao.stop("GBSJT") gare2 = dao.stop("GBSJ") gare3 = dao.stop("GBSJB") self.assertTrue(sc.in_same_cluster(gare1, gare2)) self.assertTrue(sc.in_same_cluster(gare1, gare3)) self.assertTrue(sc.in_same_cluster(gare2, gare3)) bq = dao.stop("BQ") bq1 = dao.stop("BQA") bq2 = dao.stop("BQD") self.assertTrue(sc.in_same_cluster(bq, bq1)) self.assertTrue(sc.in_same_cluster(bq, bq2)) bs = dao.stop("BS") bs1 = dao.stop("BS1") bs2 = dao.stop("BS2") self.assertTrue(sc.in_same_cluster(bs, bs1)) self.assertTrue(sc.in_same_cluster(bs, bs2)) self.assertFalse(sc.in_same_cluster(gare1, bq)) self.assertFalse(sc.in_same_cluster(gare1, bs)) self.assertFalse(sc.in_same_cluster(gare3, bs2)) bjb = dao.stop("BJB") self.assertFalse(sc.in_same_cluster(bjb, gare1)) self.assertFalse(sc.in_same_cluster(bjb, bs)) self.assertFalse(sc.in_same_cluster(bjb, bq))
def test_clusterizer(self): p1 = SimplePoint(45, 0) p2 = SimplePoint(45 + 1.001 / 60, 0) p3 = SimplePoint(45 - 0.999 / 60, 0) sc = SpatialClusterizer(self._NAUTICAL_MILE) sc.add_points((p1, p2, p3)) sc.clusterize() self.assertFalse(sc.in_same_cluster(p1, p2)) self.assertTrue(sc.in_same_cluster(p1, p3)) self.assertFalse(sc.in_same_cluster(p2, p3)) self.assertTrue(len(sc.clusters()) == 2) p1 = SimplePoint(45, 0) p2 = SimplePoint(45 + 2 * 0.8 / 60, 0) p3 = SimplePoint(45 + 1 * 0.8 / 60, 0) sc = SpatialClusterizer(self._NAUTICAL_MILE) sc.add_points((p1, p2, p3)) sc.clusterize() self.assertTrue(sc.in_same_cluster(p1, p2)) self.assertTrue(sc.in_same_cluster(p1, p3)) self.assertTrue(sc.in_same_cluster(p2, p3)) self.assertTrue(len(sc.clusters()) == 1)
def run(self, context, stopshp=None, hopshp=None, cluster=0, **kwargs): cluster_meters = float(cluster) if stopshp is None and hopshp is None: print("Nothing to generate! Bailing out") return print("Loading stops...") stops = set() sc = SpatialClusterizer(cluster_meters) for stop in context.dao().stops(fltr=context.args.filter): sc.add_point(stop) stops.add(stop) print("Loaded %d stops. Clusterize..." % (len(stops))) sc.clusterize() print("Aggregated in %d clusters" % (len(sc.clusters()))) print("Loading calendar dates") dates = set(context.dao().calendar_dates_date(fltr=context.args.filter)) print("Loaded %d dates" % (len(dates))) print("Computing stop and hop trip count...") hop_tripcount = defaultdict(lambda: [0, 0]) clu_tripcount = defaultdict(lambda: [0, 0]) ntrips = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stop_times=True, prefetch_stops=True, prefetch_calendars=True): # Compute the number of days the trip is running # RESTRICTED ON THE FILTERED DATES ndays = len([ date for date in trip.calendar.dates if date.as_date() in dates ]) for st1, st2 in trip.hops(): cluster1 = sc.cluster_of(st1.stop) cluster2 = sc.cluster_of(st2.stop) if cluster1 == cluster2: pass key = (cluster1, cluster2) hop_tripcount[key][0] += 1 hop_tripcount[key][1] += ndays clu_tripcount[cluster1][0] += 1 clu_tripcount[cluster1][1] += ndays ntrips += 1 if ntrips % 1000 == 0: print("%d trips..." % ntrips) if stopshp: print("Generating stops cluster shapefile...") stopshpwrt = shapefile.Writer(shapefile.POINT) stopshpwrt.field("id", "N") stopshpwrt.field("ids", "C", 100) stopshpwrt.field("name", "C", 200) stopshpwrt.field("ndep", "N") stopshpwrt.field("ndepday", "N") for cluster, (dep_count, depday_count) in clu_tripcount.items(): stopshpwrt.point(cluster.lon(), cluster.lat()) # X,Y ? ids = cluster.aggregate(lambda s: s.stop_id, sep=';') names = cluster.aggregate(lambda s: s.stop_name, sep=';') stopshpwrt.record(cluster.id, self.remove_accents(ids), self.remove_accents(names), dep_count, depday_count) stopshpwrt.save(stopshp) if hopshp: print("Generating hop shapefile...") hopshpwrt = shapefile.Writer(shapefile.POLYLINE) hopshpwrt.field("from_id", "N") hopshpwrt.field("from_name", "C", 200) hopshpwrt.field("to_id", "N") hopshpwrt.field("to_name", "C", 200) hopshpwrt.field("name", "C", 200) hopshpwrt.field("ntrip", "N") hopshpwrt.field("ntripday", "N") for (c1, c2), (trip_count, tripday_count) in hop_tripcount.items(): c1name = c1.aggregate(lambda s: s.stop_name, sep=';') c2name = c2.aggregate(lambda s: s.stop_name, sep=';') hopshpwrt.line(parts=[[[c1.lon(), c1.lat()], [c2.lon(), c2.lat()]]]) hopshpwrt.record(c1.id, self.remove_accents(c1name), c2.id, self.remove_accents(c2name), self.remove_accents(c1name + " -> " + c2name), trip_count, tripday_count) hopshpwrt.save(hopshp)
def run(self, context, stopshp=None, hopshp=None, cluster=0, **kwargs): cluster_meters = float(cluster) if stopshp is None and hopshp is None: print("Nothing to generate! Bailing out") return print("Loading stops...") stops = set() sc = SpatialClusterizer(cluster_meters) for stop in context.dao().stops(fltr=context.args.filter): sc.add_point(stop) stops.add(stop) print("Loaded %d stops. Clusterize..." % (len(stops))) sc.clusterize() print("Aggregated in %d clusters" % (len(sc.clusters()))) print("Loading calendar dates") dates = set( context.dao().calendar_dates_date(fltr=context.args.filter)) print("Loaded %d dates" % (len(dates))) print("Computing stop and hop trip count...") hop_tripcount = defaultdict(lambda: [0, 0]) clu_tripcount = defaultdict(lambda: [0, 0]) ntrips = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stop_times=True, prefetch_stops=True, prefetch_calendars=True): # Compute the number of days the trip is running # RESTRICTED ON THE FILTERED DATES ndays = len([ date for date in trip.calendar.dates if date.as_date() in dates ]) for st1, st2 in trip.hops(): cluster1 = sc.cluster_of(st1.stop) cluster2 = sc.cluster_of(st2.stop) if cluster1 == cluster2: pass key = (cluster1, cluster2) hop_tripcount[key][0] += 1 hop_tripcount[key][1] += ndays clu_tripcount[cluster1][0] += 1 clu_tripcount[cluster1][1] += ndays ntrips += 1 if ntrips % 1000 == 0: print("%d trips..." % ntrips) if stopshp: print("Generating stops cluster shapefile...") stopshpwrt = shapefile.Writer(shapefile.POINT) stopshpwrt.field("id", "N") stopshpwrt.field("ids", "C", 100) stopshpwrt.field("name", "C", 200) stopshpwrt.field("ndep", "N") stopshpwrt.field("ndepday", "N") for cluster, (dep_count, depday_count) in clu_tripcount.items(): stopshpwrt.point(cluster.lon(), cluster.lat()) # X,Y ? ids = cluster.aggregate(lambda s: s.stop_id, sep=';') names = cluster.aggregate(lambda s: s.stop_name, sep=';') stopshpwrt.record(cluster.id, self.remove_accents(ids), self.remove_accents(names), dep_count, depday_count) stopshpwrt.save(stopshp) if hopshp: print("Generating hop shapefile...") hopshpwrt = shapefile.Writer(shapefile.POLYLINE) hopshpwrt.field("from_id", "N") hopshpwrt.field("from_name", "C", 200) hopshpwrt.field("to_id", "N") hopshpwrt.field("to_name", "C", 200) hopshpwrt.field("name", "C", 200) hopshpwrt.field("ntrip", "N") hopshpwrt.field("ntripday", "N") for (c1, c2), (trip_count, tripday_count) in hop_tripcount.items(): c1name = c1.aggregate(lambda s: s.stop_name, sep=';') c2name = c2.aggregate(lambda s: s.stop_name, sep=';') hopshpwrt.line(parts=[[[c1.lon(), c1.lat()], [c2.lon(), c2.lat()]]]) hopshpwrt.record(c1.id, self.remove_accents(c1name), c2.id, self.remove_accents(c2name), self.remove_accents(c1name + " -> " + c2name), trip_count, tripday_count) hopshpwrt.save(hopshp)
def run(self, context, csv=None, cluster=0, dstp=0.5, samename=False, alldates=False, **kwargs): cluster_meters = float(cluster) dstp = float(dstp) print("Loading stops...") stops = set() sc = SpatialClusterizer(cluster_meters) for stop in context.dao().stops(fltr=context.args.filter): sc.add_point(stop) stops.add(stop) print("Loaded %d stops. Clusterize..." % (len(stops))) sc.clusterize(comparator=sc.make_comparator(samename, dstp)) print("Aggregated in %d clusters" % (len(sc.clusters()))) print("Loading calendar dates...") dates = set(context.dao().calendar_dates_date(fltr=context.args.filter)) print("Loaded %d dates" % (len(dates))) print("Processing trips...") departures_by_clusters = defaultdict(lambda : defaultdict(list)) ntrips = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=True, prefetch_stop_times=True, prefetch_calendars=True): for stop_time in trip.stop_times: if not stop_time.departure_time: continue if not stop_time.stop in stops: continue cluster = sc.cluster_of(stop_time.stop) departures_by_dates = departures_by_clusters[cluster] for date in trip.calendar.dates: if date.as_date() not in dates: continue departures_by_dates[date.as_date()].append(stop_time) if ntrips % 1000 == 0: print("%d trips..." % (ntrips)) ntrips += 1 with PrettyCsv(csv, ["cluster", "stop_id", "stop_name", "date", "departures", "min_time", "max_time", "dep_hour" ], **kwargs) as csvout: for cluster, departures_by_dates in departures_by_clusters.items(): for stop in cluster.items: csvout.writerow([ cluster.id, stop.stop_id, stop.stop_name ]) if alldates: # Print departure count for all dates dates_to_print = list(departures_by_dates.keys()) dates_to_print.sort() else: # Compute the max only date_max = None dep_max = 0 for date, departures in departures_by_dates.items(): ndep = len(departures) if ndep >= dep_max: dep_max = ndep date_max = date if date_max is None: continue dates_to_print = [ date_max ] for date in dates_to_print: dep_times = [dep.departure_time for dep in departures_by_dates.get(date)] max_hour = max(dep_times) min_hour = min(dep_times) delta_hour = max_hour - min_hour avg_dep = float('inf') if delta_hour == 0 else len(dep_times) * 3600. / (max_hour - min_hour) csvout.writerow([ cluster.id, None, None, date, len(dep_times), fmttime(min_hour), fmttime(max_hour), "%.3f" % avg_dep ])