def __init__( self, start_time, # Start time of time slice end_time, # End time of time slice time_slice_duration, # Time slice length in real minutes time_unit_duration, # 1 time unit = ? real minutes hex_bins): # List of hexagon bins # Get proper month and year to query the right table start_month = start_time.strftime("%B").lower() start_year = start_time.strftime("%y") end_month = end_time.strftime("%B").lower() end_year = end_time.strftime("%y") # Month overflow flag is set if start time and end time are # in different months month_overflow_flag = (start_month != end_month) # Create the query query = """ \ SELECT \ *, \ 1 AS weight \ FROM \ `yt-{0}-{1}` \ WHERE \ tpep_pickup_datetime BETWEEN '{2}' AND '{3}' \ AND pickup_bin IS NOT NULL \ AND dropoff_bin IS NOT NULL \ AND pickup_bin != dropoff_bin; \ """ # Read the query output if not month_overflow_flag: df = DataProvider.read_sql_query( query.format(start_month, start_year, start_time, end_time)) else: d0 = DataProvider.read_sql_query( query.format(start_month, start_year, start_time, end_time)) d1 = DataProvider.read_sql_query( query.format(end_month, end_year, start_time, end_time)) df = pd.concat([d0, d1]) # Assign instance variables self.df = df self.start_time = start_time self.end_time = end_time self.time_slice_duration = time_slice_duration self.time_unit_duration = time_unit_duration self.hex_bins = hex_bins
def get_trips_data(self): """ Gets data for appropriate weekday and month :param: :return df: """ # Create SQL query query = """ \ SELECT \ *, HOUR(tpep_pickup_datetime) as pickup_hour\ FROM \ `yt-{0}-{1}` \ WHERE \ WEEKDAY(tpep_pickup_datetime) like {2} \ AND pickup_bin IS NOT NULL \ AND dropoff_bin IS NOT NULL \ AND pickup_bin != dropoff_bin; \ """ # Read query output and select required columns df = DataProvider.read_sql_query( query.format(self.month, self.year, self.weekday)) cols = [ 'tpep_pickup_datetime', 'tpep_dropoff_datetime', 'trip_distance', 'fare_amount', 'duration_seconds', 'pickup_bin', 'dropoff_bin', 'pickup_hour' ] df = df[cols] return df