def entries_day_line(turnstile_weather):

    # Add Time Period 
    turnstile_weather = time_period(turnstile_weather)

    q = """
    select day_week as Day_Week,sum(ENTRIESn_hourly) as ENTRIESn_hourly
    from turnstile_weather
    group by day_week
    """
    
    #Execute your SQL command against the pandas frame
    entries_day = pandasql.sqldf(q, locals())

    plt.figure()
    plt.title('Turnstile Entries by Day (5/1/2011 - 5/31/2011)')
    plt.ylabel('Turnstile Entries (in millions)')
    plt.xlabel('Day of Week')

    y = entries_day['ENTRIESn_hourly']/1000000
    x = entries_day['Day_Week']
    labels = ['Mon','Tue','Wed','Thur','Fri', 'Sat', 'Sun']

    plt.xticks(x, labels)
    plt.xlim(-1,7)
    plt.ylim(0,25)
    plt.plot(x,y,marker='.',linestyle='--')

    #print entries_day

    return plt
Ejemplo n.º 2
0
def entries_day_line(turnstile_weather):

    # Add Time Period
    turnstile_weather = time_period(turnstile_weather)

    q = """
    select day_week as Day_Week,sum(ENTRIESn_hourly) as ENTRIESn_hourly
    from turnstile_weather
    group by day_week
    """

    #Execute your SQL command against the pandas frame
    entries_day = pandasql.sqldf(q, locals())

    plt.figure()
    plt.title('Turnstile Entries by Day (5/1/2011 - 5/31/2011)')
    plt.ylabel('Turnstile Entries (in millions)')
    plt.xlabel('Day of Week')

    y = entries_day['ENTRIESn_hourly'] / 1000000
    x = entries_day['Day_Week']
    labels = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']

    plt.xticks(x, labels)
    plt.xlim(-1, 7)
    plt.ylim(0, 25)
    plt.plot(x, y, marker='.', linestyle='--')

    #print entries_day

    return plt
def entries_time_period_bar(turnstile_weather):

    # Add Time Period 
    turnstile_weather = time_period(turnstile_weather)

    q = """
    select time_period as Time_Period,sum(ENTRIESn_hourly) as ENTRIESn_hourly
    from turnstile_weather
    group by time_period
    """
    
    #Execute your SQL command against the pandas frame
    entries_time_period = pandasql.sqldf(q, locals())

    plt.figure()
    plt.title('Turnstile Entries by Time Period')
    plt.ylabel('Turnstile Entries (in millions)')
    plt.xlabel('Time Period')

    y = entries_time_period['ENTRIESn_hourly']/1000000
    x = entries_time_period['Time_Period']
    labels = ['Late Night','Weekends','Midday','Evening','Rush Hour']

    plt.xticks(x, labels)
    plt.xlim(0,6)
    plt.ylim(0,25)
    plt.bar(x, y,width=0.25,align='center',color='DodgerBlue')

    #print entries_time_period

    return plt
Ejemplo n.º 4
0
def entries_time_period_bar(turnstile_weather):

    # Add Time Period
    turnstile_weather = time_period(turnstile_weather)

    q = """
    select time_period as Time_Period,sum(ENTRIESn_hourly) as ENTRIESn_hourly
    from turnstile_weather
    group by time_period
    """

    #Execute your SQL command against the pandas frame
    entries_time_period = pandasql.sqldf(q, locals())

    plt.figure()
    plt.title('Turnstile Entries by Time Period')
    plt.ylabel('Turnstile Entries (in millions)')
    plt.xlabel('Time Period')

    y = entries_time_period['ENTRIESn_hourly'] / 1000000
    x = entries_time_period['Time_Period']
    labels = ['Late Night', 'Weekends', 'Midday', 'Evening', 'Rush Hour']

    plt.xticks(x, labels)
    plt.xlim(0, 6)
    plt.ylim(0, 25)
    plt.bar(x, y, width=0.25, align='center', color='DodgerBlue')

    #print entries_time_period

    return plt
Ejemplo n.º 5
0
import numpy as np
import pandas as pd
import pandasql
import statsmodels.api as sm
from time_period import time_period

# Load data
input_filename = "turnstile_weather_v2.csv"
df = pd.read_csv(input_filename)

# Add Time Period
df = time_period(df)

# Create dummy units and combine
dummy_units = pd.get_dummies(df['UNIT'], prefix='unit')
dummy_hour = pd.get_dummies(df['time_period'], prefix='time')
dummy_combined = dummy_units.join(dummy_hour)

values = df[['ENTRIESn_hourly']]  # response
features = df[['rain', 'tempi', 'wspdi']].join(dummy_combined)  # predictor
features = sm.add_constant(features)  # Adds a constant term to the predictor

#features = features.drop('unit_R034', 1)

mod = sm.OLS(values, features)
res = mod.fit()
print res.summary()

#print res.params
#print res.rsquared
import numpy as np
import pandas as pd
import pandasql
import statsmodels.api as sm
from time_period import time_period

# Load data
input_filename = "turnstile_weather_v2.csv"
df = pd.read_csv(input_filename)

# Add Time Period 
df = time_period(df)

# Create dummy units and combine
dummy_units = pd.get_dummies(df['UNIT'], prefix='unit')
dummy_hour = pd.get_dummies(df['time_period'], prefix='time')
dummy_combined = dummy_units.join(dummy_hour)

values = df[['ENTRIESn_hourly']] # response
features = df[['rain', 'tempi', 'wspdi']].join(dummy_combined) # predictor
features = sm.add_constant(features)  # Adds a constant term to the predictor

#features = features.drop('unit_R034', 1)

mod = sm.OLS(values, features)
res = mod.fit()
print res.summary()

#print res.params
#print res.rsquared