Beispiel #1
0
def bike_trips(con):
    c = cdf()
    c_dur = cdf()

    cur = con.cursor()
    cur.execute("SELECT COUNT(*) trip_count, SUM(trip_duration) total_time "
                "FROM trips GROUP BY bike_id")
    while True:
        rows = cur.fetchmany(1000)
        if not rows: break
        for row in rows:
            c.insert(row["trip_count"])
            c_dur.insert(row["total_time"])
    cur.close()

    fig = plt.figure()
    plt.plot(*c.getData())
    plt.xscale("log")
    plt.xlabel("number of trips")
    plt.ylabel("CDF of bikes")
    plt.savefig("bike_trips.pdf", bbox_inches="tight")

    fig = plt.figure()
    plt.plot(*c_dur.getData())
    plt.xscale("log")
    plt.xlabel("total trip time (s)")
    plt.ylabel("CDF of bikes")
    plt.savefig("bike_time.pdf", bbox_inches="tight")
Beispiel #2
0
def bike_trips(con):
    c = cdf()
    c_dur = cdf()

    cur = con.cursor()
    cur.execute("SELECT COUNT(*) trip_count, SUM(trip_duration) total_time "
        "FROM trips GROUP BY bike_id")
    while True:
        rows = cur.fetchmany(1000)
        if not rows: break
        for row in rows:
            c.insert(row["trip_count"])
            c_dur.insert(row["total_time"])
    cur.close()

    fig = plt.figure()
    plt.plot(*c.getData())
    plt.xscale("log")
    plt.xlabel("number of trips")
    plt.ylabel("CDF of bikes")
    plt.savefig("bike_trips.pdf", bbox_inches="tight")

    fig = plt.figure()
    plt.plot(*c_dur.getData())
    plt.xscale("log")
    plt.xlabel("total trip time (s)")
    plt.ylabel("CDF of bikes")
    plt.savefig("bike_time.pdf", bbox_inches="tight")
Beispiel #3
0
	def makeCdf(self, name=None):
		""" makeCdf: builds a cumulative distribution function from the pmf

		:param (optional) name: a name for the new cdf, defaults to the name of this pmf if undefined
		"""
		if name == None:
			name = self.name
		xs = []
		counts = []
		running_total = 0
		for value, prob in sorted(self.values()):
			xs.append(value)
			counts.append(running_total)
		running_total = float(running_total)
		probs = [count / running_total for count in counts]
		return cdf(xs, probs, name)
Beispiel #4
0
def trip_duration(con):
    c = cdf(resolution=60)

    cur = con.cursor()
    cur.execute("SELECT trip_duration FROM trips")
    while True:
        rows = cur.fetchmany(1000)
        if not rows: break
        for row in rows:
            c.insert(row["trip_duration"])
    cur.close()

    fig = plt.figure()
    plt.plot(*c.getData())
    plt.xscale("log")
    plt.xlabel("trip duration (s)")
    plt.ylabel("CDF of trips")
    plt.xticks([60, 300, 900, 1800, 3600, 7200, 14400],
               ["1m", "5m", "15m", "30m", "1h", "2h", "4h"])
    plt.grid(axis="y")
    plt.savefig("duration.pdf", bbox_inches="tight")
Beispiel #5
0
def trip_duration(con):
    c = cdf(resolution=60)

    cur = con.cursor()
    cur.execute("SELECT trip_duration FROM trips")
    while True:
        rows = cur.fetchmany(1000)
        if not rows: break
        for row in rows:
            c.insert(row["trip_duration"])
    cur.close()

    fig = plt.figure()
    plt.plot(*c.getData())
    plt.xscale("log")
    plt.xlabel("trip duration (s)")
    plt.ylabel("CDF of trips")
    plt.xticks(
        [60, 300, 900, 1800, 3600, 7200, 14400],
        ["1m", "5m", "15m", "30m", "1h", "2h", "4h"])
    plt.grid(axis="y")
    plt.savefig("duration.pdf", bbox_inches="tight")
Beispiel #6
0
def make_put_cdf(res):
    from cdf import cdf
    return custom(
        lambda: cdf(res), 
        lambda cdfObj, val: (cdfObj.insert(val) or cdfObj))