Beispiel #1
0
def get_avg_prop_list():
    proportion_list = gen_proportion.get_proportion()
    # 0-815 是前17天的
    # 周末   ([5, 6, 12, 13] - 1)*48+(0……47)
    # 工作日 ([1, 2, 3, 4, 7, 8, 9, 10, 11, 14, 15, 16, 17] - 1)*48+(0……47)
    weekend_prop_list = []
    weekend = [4, 5, 11, 12]
    for i in range(48):
        tmp = np.zeros((59, 59))
        for j in weekend:
            pos = j * 48 + i
            tmp += proportion_list[pos]
        tmp = tmp / 4
        weekend_prop_list.append(tmp)

    weekday_prop_list = []
    weekday = np.array([0, 1, 2, 3, 6, 7, 8, 9, 10, 13, 14, 15, 16])
    for i in range(48):
        tmp = np.zeros((59, 59))
        for j in weekday:
            pos = j * 48 + i
            tmp += proportion_list[pos]
        tmp = tmp / 13
        weekday_prop_list.append(tmp)

    return weekday_prop_list, weekend_prop_list
def get_all_data():
    # 读取测试集并加入counter
    df_test = pd.read_csv('E:\\data\\DiDiData\\data_csv\\dataset\\Test.csv')
    df_test['counter'] = (df_test['day']-1) * 48 + df_test['time'] + 1

    df_weather = pd.read_csv('E:\\data\\DiDiData\\data_csv\\features\\before\\weather_feature.csv')
    df_time = pd.read_csv('E:\\data\\DiDiData\\data_csv\\features\\time_feature.csv')
    del df_time['date']
    del df_time['time']
    df = pd.concat([df_weather, df_time], axis=1)
    df['counter'] = range(1, 1153)
    # flow_total = list(df['count'].values)[816:]

    df_tatol_predict = pd.read_csv('E:\\data\\DiDiData\\data_csv\\result\\gbrt_toal_result.csv')
    flow_total = list(df_tatol_predict['count'].values)

    proportion_list = gen_proportion.get_proportion()
    return df, flow_total, proportion_list, df_test
Beispiel #3
0
def get_all_data():
    try:
        df_data = pd.read_csv(
            'E:\\data\\DiDiData\\data_csv\\dataset\\data_3_week.csv')
    except FileNotFoundError:
        # 读取Train 和 Test 进行拼接,按照 date、time、start_district_id、dest_district_id 进行排序,并加入counter
        df_train = pd.read_csv(
            'E:\\data\\DiDiData\\data_csv\\dataset\\Train.csv')
        df_test = pd.read_csv(
            'E:\\data\\DiDiData\\data_csv\\dataset\\Test.csv')
        df_data = pd.concat([df_train, df_test.loc[0:99123]], ignore_index=True)\
            .sort_values(by=['date', 'time', 'start_district_id', 'dest_district_id'], axis=0, ascending=True)\
            .reset_index(drop=True)
        df_data['counter'] = (df_data['day'] - 1) * 48 + df_data['time'] + 1
        df_data.set_index(
            keys=['date', 'time', 'start_district_id', 'dest_district_id'],
            inplace=True)
        df_data.to_csv(
            'E:\\data\\DiDiData\\data_csv\\dataset\\data_3_week.csv')

    df_weather = pd.read_csv(
        'E:\\data\\DiDiData\\data_csv\\features\\before\\weather_feature.csv')
    df_time = pd.read_csv(
        'E:\\data\\DiDiData\\data_csv\\features\\time_feature.csv')
    del df_time['date']
    del df_time['time']
    df_feature = pd.concat([df_weather, df_time], axis=1)
    df_feature['counter'] = range(1, 1153)
    flow_total = list(df_feature['count'].values)

    df_test = df_data.loc[319127:].reset_index(drop=True)
    flow_total_m = []
    for i in range(len(df_test)):
        flow_total_m.append(df_feature.loc[df_test.loc[i, 'counter'] - 1,
                                           'count'])
    proportion_list = gen_proportion.get_proportion()

    return df_data, df_test, df_feature, flow_total, flow_total_m, proportion_list,
    x = np.arange(1, len(prop_list))
    y = np.array(dist_avg)
    print(y)
    plt.title('最后一天与之前的平均矩阵距离')
    plt.xlabel('天')
    plt.xlim((0, len(day_list)))
    plt.ylabel('平均比例矩阵距离')
    plt.plot(x, y, color='red', marker='o', linestyle='-', linewidth=2)

    plt.legend(loc='best')
    plt.show()



def distance(prop1, prop2):
    prop1 = np.array(prop1)
    prop2 = np.array(prop2)
    d = np.sqrt(np.sum(np.square(prop1 - prop2)))
    return d


if __name__ == '__main__':
    print('start time:', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    proportion_list = gp.get_proportion()
    # measure_difference_day()
    # measure_difference(24)
    measure_difference_day_avg()
    print('end time:', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))