Exemple #1
0
    def __init__(
            self,
            start_time,  # Start time of time slice
            end_time,  # End time of time slice
            time_slice_duration,  # Time slice length in real minutes
            time_unit_duration,  # 1 time unit = ? real minutes
            hex_bins):  # List of hexagon bins

        # Get proper month and year to query the right table
        start_month = start_time.strftime("%B").lower()
        start_year = start_time.strftime("%y")
        end_month = end_time.strftime("%B").lower()
        end_year = end_time.strftime("%y")

        # Month overflow flag is set if start time and end time are
        # in different months
        month_overflow_flag = (start_month != end_month)

        # Create the query
        query = """ \
                SELECT \
                    *, \
                    1 AS weight \
                FROM \
                    `yt-{0}-{1}` \
                WHERE \
                    tpep_pickup_datetime BETWEEN '{2}' AND '{3}' \
                AND pickup_bin IS NOT NULL \
                AND dropoff_bin IS NOT NULL \
                AND pickup_bin != dropoff_bin; \
                """

        # Read the query output
        if not month_overflow_flag:
            df = DataProvider.read_sql_query(
                query.format(start_month, start_year, start_time, end_time))
        else:
            d0 = DataProvider.read_sql_query(
                query.format(start_month, start_year, start_time, end_time))
            d1 = DataProvider.read_sql_query(
                query.format(end_month, end_year, start_time, end_time))
            df = pd.concat([d0, d1])

        # Assign instance variables
        self.df = df
        self.start_time = start_time
        self.end_time = end_time
        self.time_slice_duration = time_slice_duration
        self.time_unit_duration = time_unit_duration
        self.hex_bins = hex_bins
    def get_trips_data(self):
        """
        Gets data for appropriate weekday and month
        :param:
        :return df:
        """
        # Create SQL query
        query = """ \
                SELECT \
                    *,
                    HOUR(tpep_pickup_datetime) as pickup_hour\
                FROM \
                    `yt-{0}-{1}` \
                WHERE \
                    WEEKDAY(tpep_pickup_datetime) like {2} \
                AND pickup_bin IS NOT NULL \
                AND dropoff_bin IS NOT NULL \
                AND pickup_bin != dropoff_bin; \
                """

        # Read query output and select required columns
        df = DataProvider.read_sql_query(
            query.format(self.month, self.year, self.weekday))
        cols = [
            'tpep_pickup_datetime', 'tpep_dropoff_datetime', 'trip_distance',
            'fare_amount', 'duration_seconds', 'pickup_bin', 'dropoff_bin',
            'pickup_hour'
        ]
        df = df[cols]

        return df