def bike_trips(con): c = cdf() c_dur = cdf() cur = con.cursor() cur.execute("SELECT COUNT(*) trip_count, SUM(trip_duration) total_time " "FROM trips GROUP BY bike_id") while True: rows = cur.fetchmany(1000) if not rows: break for row in rows: c.insert(row["trip_count"]) c_dur.insert(row["total_time"]) cur.close() fig = plt.figure() plt.plot(*c.getData()) plt.xscale("log") plt.xlabel("number of trips") plt.ylabel("CDF of bikes") plt.savefig("bike_trips.pdf", bbox_inches="tight") fig = plt.figure() plt.plot(*c_dur.getData()) plt.xscale("log") plt.xlabel("total trip time (s)") plt.ylabel("CDF of bikes") plt.savefig("bike_time.pdf", bbox_inches="tight")
def bike_trips(con): c = cdf() c_dur = cdf() cur = con.cursor() cur.execute("SELECT COUNT(*) trip_count, SUM(trip_duration) total_time " "FROM trips GROUP BY bike_id") while True: rows = cur.fetchmany(1000) if not rows: break for row in rows: c.insert(row["trip_count"]) c_dur.insert(row["total_time"]) cur.close() fig = plt.figure() plt.plot(*c.getData()) plt.xscale("log") plt.xlabel("number of trips") plt.ylabel("CDF of bikes") plt.savefig("bike_trips.pdf", bbox_inches="tight") fig = plt.figure() plt.plot(*c_dur.getData()) plt.xscale("log") plt.xlabel("total trip time (s)") plt.ylabel("CDF of bikes") plt.savefig("bike_time.pdf", bbox_inches="tight")
def makeCdf(self, name=None): """ makeCdf: builds a cumulative distribution function from the pmf :param (optional) name: a name for the new cdf, defaults to the name of this pmf if undefined """ if name == None: name = self.name xs = [] counts = [] running_total = 0 for value, prob in sorted(self.values()): xs.append(value) counts.append(running_total) running_total = float(running_total) probs = [count / running_total for count in counts] return cdf(xs, probs, name)
def trip_duration(con): c = cdf(resolution=60) cur = con.cursor() cur.execute("SELECT trip_duration FROM trips") while True: rows = cur.fetchmany(1000) if not rows: break for row in rows: c.insert(row["trip_duration"]) cur.close() fig = plt.figure() plt.plot(*c.getData()) plt.xscale("log") plt.xlabel("trip duration (s)") plt.ylabel("CDF of trips") plt.xticks([60, 300, 900, 1800, 3600, 7200, 14400], ["1m", "5m", "15m", "30m", "1h", "2h", "4h"]) plt.grid(axis="y") plt.savefig("duration.pdf", bbox_inches="tight")
def trip_duration(con): c = cdf(resolution=60) cur = con.cursor() cur.execute("SELECT trip_duration FROM trips") while True: rows = cur.fetchmany(1000) if not rows: break for row in rows: c.insert(row["trip_duration"]) cur.close() fig = plt.figure() plt.plot(*c.getData()) plt.xscale("log") plt.xlabel("trip duration (s)") plt.ylabel("CDF of trips") plt.xticks( [60, 300, 900, 1800, 3600, 7200, 14400], ["1m", "5m", "15m", "30m", "1h", "2h", "4h"]) plt.grid(axis="y") plt.savefig("duration.pdf", bbox_inches="tight")
def make_put_cdf(res): from cdf import cdf return custom( lambda: cdf(res), lambda cdfObj, val: (cdfObj.insert(val) or cdfObj))