Example #1
0
def load_other_bj_taxi_external(timeslots):
    """
    ret:
        if 1 represent the day is a holiday, otherwise 0
        the hour of a day, from 0 to 23
        the day is weekday or weekend, from 0 to 6
    """
    path = os.path.join(utils.get_data_path(), 'TaxiBJ', 'BJ_Holiday.txt')

    with open(path, 'r') as f:
        holiday = np.asarray(list(map(str.strip, f.readlines())))

    date = [time[:-2] for time in timeslots]
    hour = [int(time[-2:]) // 2 for time in timeslots]
    dayOfWeek = [datetime.strptime(time, "%Y%m%d").weekday() for time in date]

    # create vacation
    vacation = []
    for day in date:
        tmp = np.where(holiday == day)[0]
        if len(tmp) == 1:
            vacation.append(1)
        else:
            vacation.append(0)

    vacation = np.asarray(vacation)
    hour = np.asarray(hour)
    dayOfWeek = np.asarray(dayOfWeek)

    return vacation, hour, dayOfWeek
Example #2
0
def load_bj_taxi_flow():
    """
    ret:
        complete taxi flow data
    """

    path_list = []
    for year in range(16, 17):
        path_list.append(
            os.path.join(utils.get_data_path(), 'TaxiBJ',
                         'BJ{}_M32x32_T30_InOut.h5'.format(year)))
    # print(path_list)

    complete_date = []
    complete_data = []
    for path in path_list:
        with h5py.File(path, 'r+') as f:
            date = np.array(f['date'][()])
            data = np.array(f['data'][()])
            # 1. 缺省值处理,将不完整的一天去掉
            date, data = utils.remove_incomplete_day(date, data)
            # 2. 异常值处理,去掉流动量小于0的值
            data = np.where(data > 0, data, 0)
            complete_date.extend(date)
            complete_data.extend(data)

    complete_data = np.asarray(complete_data)
    mmn = MinMaxNormalization()
    mmn.fit(complete_data)
    complete_data = np.asarray([mmn.transform(d) for d in complete_data])
    print(mmn._max, mmn._min)

    # 把这个对象存储在pkl文件里
    path = os.path.join(utils.get_pkl_path(), 'bj_taxi_preprocessing.pkl')
    fpkl = open(path, 'wb')
    for obj in [mmn]:
        pickle.dump(obj, fpkl)
    fpkl.close()

    complete_date = np.array([time.decode('utf-8') for time in complete_date])
    complete_data = np.asarray(complete_data)

    # create index:date->index->data
    index = {}
    for i, date in enumerate(complete_date):
        index[date] = i

    return complete_date, complete_data, mmn, index
Example #3
0
def load_ny_bike_flow():
    """
    ret:
        complete taxi flow data
    """

    path = os.path.join(utils.get_data_path(), 'BikeNYC',
                        'NYC14_M16x8_T60_NewEnd.h5')
    # print(path_list)

    complete_date = []
    complete_data = []
    with h5py.File(path, 'r+') as f:
        date = np.array(f['date'][()])
        data = np.array(f['data'][()])
        # 1. 缺省值处理,将不完整的一天去掉
        date, data = utils.remove_incomplete_day(date, data, unit_len=24)
        # 2. 异常值处理,去掉流动量小于0的值
        data = np.where(data > 0, data, 0)
        complete_date.extend(date)
        complete_data.extend(data)

    complete_data = np.asarray(complete_data)
    print(complete_data.shape)

    mmn = MinMaxNormalization()
    mmn.fit(complete_data)
    complete_data = np.asarray([mmn.transform(d) for d in complete_data])

    # 把这个对象存储在pkl文件里
    path = os.path.join(utils.get_pkl_path(), 'ny_bike_preprocessing.pkl')
    fpkl = open(path, 'wb')
    for obj in [mmn]:
        pickle.dump(obj, fpkl)
    fpkl.close()

    complete_date = np.array([time.decode('utf-8') for time in complete_date])
    complete_data = np.asarray(complete_data)

    # create index:date->index->data
    index = {}
    for i, date in enumerate(complete_date):
        index[date] = i

    return complete_date, complete_data, mmn, index
Example #4
0
def load_bj_taxi_meteorology(timeslots=None):
    path = os.path.join(utils.get_data_path(), 'TaxiBJ', 'BJ_Meteorology.h5')

    with h5py.File(path, 'r+') as f:
        Weather = f['Weather'][()]
        date = f['date'][()]

    # byte->str
    date = np.array([time.decode('utf-8') for time in date])

    # 创建索引
    index = [np.where(date == time)[0][0] for time in timeslots]

    weather = []

    for idx in index:
        weather.append(Weather[idx - 1])
    weather = np.asarray(weather)
    return weather
Example #5
0
def load_bj_taxi_holiday(timeslots):
    """
    ret:  holiday list
    """
    path = os.path.join(utils.get_data_path(), 'TaxiBJ', 'BJ_Holiday.txt')

    with open(path, 'r') as f:
        holiday = f.readlines()
        holiday = list(map(str.strip, holiday))

    # 将预测日期中为节假日的日期置为1
    feature_holiday = []

    for time in timeslots:
        if time[:-2] in holiday:
            feature_holiday.append(1)
        else:
            feature_holiday.append(0)

    return np.asarray(feature_holiday)
Example #6
0
def load_bj_taxi_meteorology(timeslots=None):
    """
    args:
        timeslots: a list of date
    :return wind speed; temperature; weather
    """
    path = os.path.join(utils.get_data_path(), 'TaxiBJ', 'BJ_Meteorology.h5')

    with h5py.File(path, 'r+') as f:
        Temperature = f['Temperature'][()]
        Weather = f['Weather'][()]
        WindSpeed = f['WindSpeed'][()]
        date = f['date'][()]

    # 从numpy byte转化为str
    date = np.array([time.decode('utf-8') for time in date])

    # 创建索引
    index = [np.where(date == time)[0][0] for time in timeslots]

    temperature = []
    windSpeed = []
    weather = []

    # the last time slot used as the external data
    for idx in index:
        temperature.append(Temperature[idx - 1])
        weather.append(np.argwhere(
            Weather[idx - 1] == 1).squeeze())  # just one index for embed
        windSpeed.append(WindSpeed[idx - 1])

    temperature = np.asarray(temperature)
    weather = np.asarray(weather)
    windSpeed = np.asarray(windSpeed)

    # min-max-scale to wind speed and temperature
    temperature = MinMaxScale(temperature)
    windSpeed = MinMaxScale(windSpeed)

    return weather, temperature, windSpeed