import pandas as pd from regression import Regression x = Regression() df = pd.read_csv("trips_summary_covid_pub.csv") df['date'] = pd.to_datetime(df['date']) df['pre_covid'] = (df.date < "12/19/2020").astype("int") df = df.loc[df.pre_covid == 0] # TOTAL REVENUE MODEL x.select_cols = [ 'pickup_community_area', 'hour', 'week_day', 'cases', 'total_revenue' ] x.dummy_cols = ['pickup_community_area', 'hour', 'week_day'] x.y_col = 'total_revenue' res_revenue = x.time_split(df) # TOTAL COUNT MODEL x.select_cols = ['pickup_community_area', 'hour', 'week_day', 'cases', 'count'] x.y_col = 'count' res_count = x.time_split(df) # TOTAL SECONDS MODEL x.select_cols = [ 'pickup_community_area', 'hour', 'week_day', 'cases', 'trip_seconds_tot' ] x.y_col = 'trip_seconds_tot'
# READ DATASET df = pd.read_csv("trips_summary_covid_pub.csv") df['date'] = pd.to_datetime(df['date']) # FILTER PRE AND POST PANDEMIC df['pre_covid'] = (df.date < "12/19/2020").astype("int") df_pre_covid = df.loc[df.pre_covid == 1] df = df.loc[df.pre_covid == 0] df = df.loc[df['total_revenue'] > 0] df_pre_covid = df_pre_covid.loc[df_pre_covid['total_revenue'] > 0] # TOTAL REVENUE MODEL x.select_cols = ['pickup_community_area', 'hour', 'total_revenue'] x.dummy_cols = ['pickup_community_area', 'hour'] x.y_col = 'total_revenue' res_revenue = x.poisson_regression(df, 0.7) # TOTAL REVENUE PRE COVID x.select_cols = ['pickup_community_area', 'hour', 'total_revenue'] x.y_col = 'total_revenue' res_revenue_pre = x.poisson_regression(df_pre_covid, 0.7) # TOTAL COUNT MODEL x.select_cols = ['pickup_community_area', 'hour', 'count'] x.y_col = 'count' res_count = x.poisson_regression(df, 0.7)