コード例 #1
0
ファイル: solids.py プロジェクト: konradmalik/tech-sandbox
def transform_into_traffic_dataset(_, trip_dataset: TripDataFrame) -> TrafficDataFrame:
    def max_traffic_load(trips):
        interval_count = {
            start_interval: 0 for start_interval in date_range(trips.name, periods=24, freq='h')
        }
        for interval in interval_count.keys():
            upper_bound_interval = interval + timedelta(hours=1)
            # Count number of bikes in transit during sample interval
            interval_count[interval] = len(
                trips[
                    (
                        (  # Select trip if the trip started within the sample interval
                            (interval <= trips['start_time'])
                            & (trips['start_time'] < upper_bound_interval)
                        )
                        | (  # Select trip if the trip ended within the sample interval
                            (interval <= trips['end_time'])
                            & (trips['end_time'] < upper_bound_interval)
                        )
                        | (  # Select trip if the trip started AND ended outside of the interval
                            (trips['start_time'] < interval)
                            & (trips['end_time'] >= upper_bound_interval)
                        )
                    )
                ]
            )
        return max(interval_count.values())

    counts = trip_dataset.groupby(['interval_date']).apply(max_traffic_load)
    traffic_dataset = DataFrame(counts).reset_index()
    traffic_dataset.columns = ['interval_date', 'peak_traffic_load']
    return TrafficDataFrame(traffic_dataset)
コード例 #2
0
ファイル: solids.py プロジェクト: konradmalik/tech-sandbox
def preprocess_trip_dataset(_, dataframe: DataFrame) -> TripDataFrame:
    dataframe = dataframe[['bike_id', 'start_time', 'end_time']].dropna(how='all').reindex()
    dataframe['bike_id'] = dataframe['bike_id'].astype('int64')
    dataframe['start_time'] = to_datetime(dataframe['start_time'])
    dataframe['end_time'] = to_datetime(dataframe['end_time'])
    dataframe['interval_date'] = dataframe['start_time'].apply(lambda x: x.date())
    return TripDataFrame(dataframe)