Example #1
0
    def test_clusterizer(self):
        dao = Dao(DAO_URL, sql_logging=SQL_LOG)
        dao.load_gtfs(DUMMY_GTFS)

        # Merge stops closer than 300m together
        sc = SpatialClusterizer(300.0)
        for stop in dao.stops():
            sc.add_point(stop)
        sc.clusterize()

        # for cluster in sc.clusters():
        #    print("---CLUSTER: %d stops" % (len(cluster)))
        #    for stop in cluster:
        #        print("%s %s" % (stop.stop_id, stop.stop_name))

        gare1 = dao.stop("GBSJT")
        gare2 = dao.stop("GBSJ")
        gare3 = dao.stop("GBSJB")
        self.assertTrue(sc.in_same_cluster(gare1, gare2))
        self.assertTrue(sc.in_same_cluster(gare1, gare3))
        self.assertTrue(sc.in_same_cluster(gare2, gare3))

        bq = dao.stop("BQ")
        bq1 = dao.stop("BQA")
        bq2 = dao.stop("BQD")
        self.assertTrue(sc.in_same_cluster(bq, bq1))
        self.assertTrue(sc.in_same_cluster(bq, bq2))

        bs = dao.stop("BS")
        bs1 = dao.stop("BS1")
        bs2 = dao.stop("BS2")
        self.assertTrue(sc.in_same_cluster(bs, bs1))
        self.assertTrue(sc.in_same_cluster(bs, bs2))

        self.assertFalse(sc.in_same_cluster(gare1, bq))
        self.assertFalse(sc.in_same_cluster(gare1, bs))
        self.assertFalse(sc.in_same_cluster(gare3, bs2))

        bjb = dao.stop("BJB")
        self.assertFalse(sc.in_same_cluster(bjb, gare1))
        self.assertFalse(sc.in_same_cluster(bjb, bs))
        self.assertFalse(sc.in_same_cluster(bjb, bq))
Example #2
0
    def test_clusterizer(self):

        p1 = SimplePoint(45, 0)
        p2 = SimplePoint(45 + 1.001 / 60, 0)
        p3 = SimplePoint(45 - 0.999 / 60, 0)
        sc = SpatialClusterizer(self._NAUTICAL_MILE)
        sc.add_points((p1, p2, p3))
        sc.clusterize()
        self.assertFalse(sc.in_same_cluster(p1, p2))
        self.assertTrue(sc.in_same_cluster(p1, p3))
        self.assertFalse(sc.in_same_cluster(p2, p3))
        self.assertTrue(len(sc.clusters()) == 2)

        p1 = SimplePoint(45, 0)
        p2 = SimplePoint(45 + 2 * 0.8 / 60, 0)
        p3 = SimplePoint(45 + 1 * 0.8 / 60, 0)
        sc = SpatialClusterizer(self._NAUTICAL_MILE)
        sc.add_points((p1, p2, p3))
        sc.clusterize()
        self.assertTrue(sc.in_same_cluster(p1, p2))
        self.assertTrue(sc.in_same_cluster(p1, p3))
        self.assertTrue(sc.in_same_cluster(p2, p3))
        self.assertTrue(len(sc.clusters()) == 1)
Example #3
0
    def run(self, context, stopshp=None, hopshp=None, cluster=0, **kwargs):
        cluster_meters = float(cluster)
        if stopshp is None and hopshp is None:
            print("Nothing to generate! Bailing out")
            return

        print("Loading stops...")
        stops = set()
        sc = SpatialClusterizer(cluster_meters)
        for stop in context.dao().stops(fltr=context.args.filter):
            sc.add_point(stop)
            stops.add(stop)
        print("Loaded %d stops. Clusterize..." % (len(stops)))
        sc.clusterize()
        print("Aggregated in %d clusters" % (len(sc.clusters())))

        print("Loading calendar dates")
        dates = set(context.dao().calendar_dates_date(fltr=context.args.filter))
        print("Loaded %d dates" % (len(dates)))

        print("Computing stop and hop trip count...")
        hop_tripcount = defaultdict(lambda: [0, 0])
        clu_tripcount = defaultdict(lambda: [0, 0])
        ntrips = 0
        for trip in context.dao().trips(fltr=context.args.filter, prefetch_stop_times=True, prefetch_stops=True, prefetch_calendars=True):
            # Compute the number of days the trip is running
            # RESTRICTED ON THE FILTERED DATES
            ndays = len([ date for date in trip.calendar.dates if date.as_date() in dates ])
            for st1, st2 in trip.hops():
                cluster1 = sc.cluster_of(st1.stop)
                cluster2 = sc.cluster_of(st2.stop)
                if cluster1 == cluster2:
                    pass
                key = (cluster1, cluster2)
                hop_tripcount[key][0] += 1
                hop_tripcount[key][1] += ndays
                clu_tripcount[cluster1][0] += 1
                clu_tripcount[cluster1][1] += ndays
            ntrips += 1
            if ntrips % 1000 == 0:
                print("%d trips..." % ntrips)

        if stopshp:
            print("Generating stops cluster shapefile...")
            stopshpwrt = shapefile.Writer(shapefile.POINT)
            stopshpwrt.field("id", "N")
            stopshpwrt.field("ids", "C", 100)
            stopshpwrt.field("name", "C", 200)
            stopshpwrt.field("ndep", "N")
            stopshpwrt.field("ndepday", "N")
            for cluster, (dep_count, depday_count) in clu_tripcount.items():
                stopshpwrt.point(cluster.lon(), cluster.lat()) # X,Y ?
                ids = cluster.aggregate(lambda s: s.stop_id, sep=';')
                names = cluster.aggregate(lambda s: s.stop_name, sep=';')
                stopshpwrt.record(cluster.id, self.remove_accents(ids),
                                  self.remove_accents(names),
                                  dep_count, depday_count)
            stopshpwrt.save(stopshp)

        if hopshp:
            print("Generating hop shapefile...")
            hopshpwrt = shapefile.Writer(shapefile.POLYLINE)
            hopshpwrt.field("from_id", "N")
            hopshpwrt.field("from_name", "C", 200)
            hopshpwrt.field("to_id", "N")
            hopshpwrt.field("to_name", "C", 200)
            hopshpwrt.field("name", "C", 200)
            hopshpwrt.field("ntrip", "N")
            hopshpwrt.field("ntripday", "N")
            for (c1, c2), (trip_count, tripday_count) in hop_tripcount.items():
                c1name = c1.aggregate(lambda s: s.stop_name, sep=';')
                c2name = c2.aggregate(lambda s: s.stop_name, sep=';')
                hopshpwrt.line(parts=[[[c1.lon(), c1.lat()], [c2.lon(), c2.lat()]]])
                hopshpwrt.record(c1.id, self.remove_accents(c1name), c2.id,
                                 self.remove_accents(c2name),
                                 self.remove_accents(c1name + " -> " + c2name),
                                 trip_count, tripday_count)
            hopshpwrt.save(hopshp)
Example #4
0
    def run(self, context, stopshp=None, hopshp=None, cluster=0, **kwargs):
        cluster_meters = float(cluster)
        if stopshp is None and hopshp is None:
            print("Nothing to generate! Bailing out")
            return

        print("Loading stops...")
        stops = set()
        sc = SpatialClusterizer(cluster_meters)
        for stop in context.dao().stops(fltr=context.args.filter):
            sc.add_point(stop)
            stops.add(stop)
        print("Loaded %d stops. Clusterize..." % (len(stops)))
        sc.clusterize()
        print("Aggregated in %d clusters" % (len(sc.clusters())))

        print("Loading calendar dates")
        dates = set(
            context.dao().calendar_dates_date(fltr=context.args.filter))
        print("Loaded %d dates" % (len(dates)))

        print("Computing stop and hop trip count...")
        hop_tripcount = defaultdict(lambda: [0, 0])
        clu_tripcount = defaultdict(lambda: [0, 0])
        ntrips = 0
        for trip in context.dao().trips(fltr=context.args.filter,
                                        prefetch_stop_times=True,
                                        prefetch_stops=True,
                                        prefetch_calendars=True):
            # Compute the number of days the trip is running
            # RESTRICTED ON THE FILTERED DATES
            ndays = len([
                date for date in trip.calendar.dates if date.as_date() in dates
            ])
            for st1, st2 in trip.hops():
                cluster1 = sc.cluster_of(st1.stop)
                cluster2 = sc.cluster_of(st2.stop)
                if cluster1 == cluster2:
                    pass
                key = (cluster1, cluster2)
                hop_tripcount[key][0] += 1
                hop_tripcount[key][1] += ndays
                clu_tripcount[cluster1][0] += 1
                clu_tripcount[cluster1][1] += ndays
            ntrips += 1
            if ntrips % 1000 == 0:
                print("%d trips..." % ntrips)

        if stopshp:
            print("Generating stops cluster shapefile...")
            stopshpwrt = shapefile.Writer(shapefile.POINT)
            stopshpwrt.field("id", "N")
            stopshpwrt.field("ids", "C", 100)
            stopshpwrt.field("name", "C", 200)
            stopshpwrt.field("ndep", "N")
            stopshpwrt.field("ndepday", "N")
            for cluster, (dep_count, depday_count) in clu_tripcount.items():
                stopshpwrt.point(cluster.lon(), cluster.lat())  # X,Y ?
                ids = cluster.aggregate(lambda s: s.stop_id, sep=';')
                names = cluster.aggregate(lambda s: s.stop_name, sep=';')
                stopshpwrt.record(cluster.id, self.remove_accents(ids),
                                  self.remove_accents(names), dep_count,
                                  depday_count)
            stopshpwrt.save(stopshp)

        if hopshp:
            print("Generating hop shapefile...")
            hopshpwrt = shapefile.Writer(shapefile.POLYLINE)
            hopshpwrt.field("from_id", "N")
            hopshpwrt.field("from_name", "C", 200)
            hopshpwrt.field("to_id", "N")
            hopshpwrt.field("to_name", "C", 200)
            hopshpwrt.field("name", "C", 200)
            hopshpwrt.field("ntrip", "N")
            hopshpwrt.field("ntripday", "N")
            for (c1, c2), (trip_count, tripday_count) in hop_tripcount.items():
                c1name = c1.aggregate(lambda s: s.stop_name, sep=';')
                c2name = c2.aggregate(lambda s: s.stop_name, sep=';')
                hopshpwrt.line(parts=[[[c1.lon(), c1.lat()],
                                       [c2.lon(), c2.lat()]]])
                hopshpwrt.record(c1.id, self.remove_accents(c1name), c2.id,
                                 self.remove_accents(c2name),
                                 self.remove_accents(c1name + " -> " + c2name),
                                 trip_count, tripday_count)
            hopshpwrt.save(hopshp)
Example #5
0
    def test_clusterizer(self):

        p1 = SimplePoint(45, 0)
        p2 = SimplePoint(45 + 1.001 / 60, 0)
        p3 = SimplePoint(45 - 0.999 / 60, 0)
        sc = SpatialClusterizer(self._NAUTICAL_MILE)
        sc.add_points((p1, p2, p3))
        sc.clusterize()
        self.assertFalse(sc.in_same_cluster(p1, p2))
        self.assertTrue(sc.in_same_cluster(p1, p3))
        self.assertFalse(sc.in_same_cluster(p2, p3))
        self.assertTrue(len(sc.clusters()) == 2)

        p1 = SimplePoint(45, 0)
        p2 = SimplePoint(45 + 2 * 0.8 / 60, 0)
        p3 = SimplePoint(45 + 1 * 0.8 / 60, 0)
        sc = SpatialClusterizer(self._NAUTICAL_MILE)
        sc.add_points((p1, p2, p3))
        sc.clusterize()
        self.assertTrue(sc.in_same_cluster(p1, p2))
        self.assertTrue(sc.in_same_cluster(p1, p3))
        self.assertTrue(sc.in_same_cluster(p2, p3))
        self.assertTrue(len(sc.clusters()) == 1)
Example #6
0
    def run(self, context, csv=None, cluster=0, dstp=0.5, samename=False, alldates=False, **kwargs):
        cluster_meters = float(cluster)
        dstp = float(dstp)

        print("Loading stops...")
        stops = set()
        sc = SpatialClusterizer(cluster_meters)
        for stop in context.dao().stops(fltr=context.args.filter):
            sc.add_point(stop)
            stops.add(stop)
        print("Loaded %d stops. Clusterize..." % (len(stops)))
        sc.clusterize(comparator=sc.make_comparator(samename, dstp))
        print("Aggregated in %d clusters" % (len(sc.clusters())))
        
        print("Loading calendar dates...")
        dates = set(context.dao().calendar_dates_date(fltr=context.args.filter))
        print("Loaded %d dates" % (len(dates)))
        
        print("Processing trips...")
        departures_by_clusters = defaultdict(lambda : defaultdict(list))
        ntrips = 0
        for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=True, prefetch_stop_times=True, prefetch_calendars=True):
            for stop_time in trip.stop_times:
                if not stop_time.departure_time:
                    continue
                if not stop_time.stop in stops:
                    continue
                cluster = sc.cluster_of(stop_time.stop)
                departures_by_dates = departures_by_clusters[cluster]
                for date in trip.calendar.dates:
                    if date.as_date() not in dates:
                        continue
                    departures_by_dates[date.as_date()].append(stop_time)
            if ntrips % 1000 == 0:
                print("%d trips..." % (ntrips))
            ntrips += 1

        with PrettyCsv(csv, ["cluster", "stop_id", "stop_name", "date", "departures", "min_time", "max_time", "dep_hour" ], **kwargs) as csvout:
            for cluster, departures_by_dates in departures_by_clusters.items():
                for stop in cluster.items:
                    csvout.writerow([ cluster.id, stop.stop_id, stop.stop_name ])
                if alldates:
                    # Print departure count for all dates
                    dates_to_print = list(departures_by_dates.keys())
                    dates_to_print.sort()
                else:
                    # Compute the max only
                    date_max = None
                    dep_max = 0
                    for date, departures in departures_by_dates.items():
                        ndep = len(departures)
                        if ndep >= dep_max:
                            dep_max = ndep
                            date_max = date
                    if date_max is None:
                        continue
                    dates_to_print = [ date_max ]
                for date in dates_to_print:
                    dep_times = [dep.departure_time for dep in departures_by_dates.get(date)]
                    max_hour = max(dep_times)
                    min_hour = min(dep_times)
                    delta_hour = max_hour - min_hour
                    avg_dep = float('inf') if delta_hour == 0 else len(dep_times) * 3600. / (max_hour - min_hour)
                    csvout.writerow([ cluster.id, None, None, date, len(dep_times), fmttime(min_hour), fmttime(max_hour), "%.3f" % avg_dep ])