Exemple #1
0
def add_help_data(pickle_dir=Path('pickles')):
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    for path in file_paths:
        print(path)
        path = pickle_dir / Path(path)
        df_phases = list(
            map(lambda p: pd.read_pickle(path / ("phase" + p)),
                ['1', '2', '3']))
        print("Opened pickle")
        phase_values = pd.DataFrame()
        for i, df_p in enumerate(df_phases):
            df_p.drop(columns=['Unit', 'AliasName'], inplace=True)
            phase = 'p' + str(i + 1)
            phase_values[phase] = df_p.Value
        for df_p in df_phases:
            df_p['row_dif'] = df_p.Value.diff()
        print("Created help values")
        np.diff(phase_values.values)
        phase_values['max_dif'] = phase_values.apply(lambda row: max(
            abs(row['p1'] - row['p2']), abs(row['p1'] - row['p3']),
            abs(row['p2'] - row['p3'])),
                                                     axis=1)
        print("Calculated help data")
        for df_p in df_phases:
            df_p['phase_dif'] = phase_values['max_dif']
        print("Assigned help data")
        for i, df_p in enumerate(df_phases):
            print(df_p)
            df_p.to_pickle(path / ("h_phase" + str(i + 1)))
Exemple #2
0
def update_trafo(pickle_dir=Path('pickles')):
    # pd.options.mode.chained_assignment = None
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    for path in file_paths:
        print(path)
        path = pickle_dir / Path(path)
        df_phases = list(
            map(lambda p: pd.read_pickle(path / ("h_phase" + p)),
                ['1', '2', '3']))
        print("Opened pickle")
        df_row_difs = pd.DataFrame()
        for p, df_p in enumerate(df_phases):
            df_p['row_dif'] = df_p.Value.diff() / df_p.Value.index.to_series(
            ).diff().dt.total_seconds()
            df_row_difs[str(p)] = df_p['row_dif']
        df_row_difs.loc[True
                        ^ (((df_row_difs['0'] >= 0) & (df_row_difs['1'] >= 0) &
                            (df_row_difs['2'] >= 0)) | (
                                (df_row_difs['0'] < 0) & (df_row_difs['1'] < 0)
                                & (df_row_difs['2'] < 0)))] = 0
        df_row_difs = df_row_difs.abs()
        for df_p in df_phases:
            #  df_p['trafo'] = min(df_phases[0]['row_dif'].abs(), df_phases[1]['row_dif'].abs(), df_phases[2]['row_dif'].abs())
            df_p['trafo'] = df_row_difs.min(axis=1)

        print("Assigned help data")
        for i, df_p in enumerate(df_phases):
            #  print(df_p)
            df_p.to_pickle(path / ("h_phase" + str(i + 1)))
Exemple #3
0
def create_mean_street_pickles(pickle_dir=Path('pickles')):
    station_avgs = pd.DataFrame()
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    day = pd.Timedelta('1d')
    for path in file_paths:
        station_name = path
        print(path)
        path = pickle_dir / Path(path)
        df_phases = pd.DataFrame()
        for p, df_p in enumerate(
                list(
                    map(
                        lambda p: pd.read_pickle(path /
                                                 ("phase" + p))[['Value']],
                        ['1', '2', '3']))):
            df_phases = df_phases.join(
                other=df_p.rename(columns={'Value': 'ValueP' + str(p + 1)}),
                how='outer')
        df_phases = df_phases.resample('30s').mean()
        df_phases[station_name] = df_phases.mean(axis=1)
        station_avgs = station_avgs.join(df_phases[[station_name]],
                                         how='outer')
    station_avgs = station_avgs.mean(axis=1)
    print(station_avgs)
    station_avgs.to_pickle(pickle_dir / 'meanStationValues')
Exemple #4
0
def add_cross_station_data(pickle_dir=Path('pickles')):
    station_avgs = pd.read_pickle(pickle_directory / "meanStationValues")
    file_paths = get_file_paths(pickle_dir)
    for path in file_paths:
        print(path)
        path = pickle_dir / Path(path)
        df_phases = list(
            map(lambda p: pd.read_pickle(path / ("h_phase" + p)),
                ['1', '2', '3']))
        for p, df_p in enumerate(df_phases):
            print(p)
            print(df_p)
            v1s = []
            for index, row in df_p.iterrows():
                v1 = row['Value'] - station_avgs.loc[
                    index - datetime.timedelta(seconds=index.second % 30,
                                               microseconds=index.microsecond)]
                v1s.append(v1)
            df_p['StationDif'] = v1s
            # df_p.apply(lambda row:print(row), axis=1)
            # df_p['StationDif'] = df_p.apply(lambda row: (row['Value'] - station_avgs.loc[
            #     (row.name - datetime.timedelta(seconds=row.name.second % 30,
            #                                         microseconds=row.name.microsecond)).time()]), axis=1)
            print(df_p)
            df_p.to_pickle(path / ("h_phase" + str(p + 1)))
Exemple #5
0
def add_new_seasonal_data(pickle_dir=Path('pickles')):
    file_paths = get_file_paths(pickle_dir)
    for path in file_paths:
        station_season = pd.read_pickle(pickle_dir /
                                        (path + 'season_aggregation'))
        print(path)
        path = pickle_dir / Path(path)
        df_phases = list(
            map(lambda p: pd.read_pickle(path / ("h_phase" + p)),
                ['1', '2', '3']))
        for p, df_p in enumerate(df_phases):
            df_p.drop(labels='SeasDif', inplace=True, errors='ignore')
            print(p)
            print(df_p)
            v1s = []
            print(station_season)
            print(station_season.sort_index())
            for index, row in df_p.iterrows():
                print(row['Value'])
                print(index)
                print(index - datetime.timedelta(
                    seconds=index.second % 30, microseconds=index.microsecond))
                print(station_season.loc[index - datetime.timedelta(
                    seconds=index.second %
                    30, microseconds=index.microsecond)])
                v1 = row['Value'] - station_season.loc[
                    index - datetime.timedelta(seconds=index.second % 30,
                                               microseconds=index.microsecond)]
                print(v1)
                v1s.append(v1)
            df_p['SeasDif'] = v1s
            print(df_p)
            df_p.to_pickle(path / ("h_phase" + str(p + 1)))
Exemple #6
0
def drop_useless_labels(pickle_dir=Path('pickles')):
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    day = pd.Timedelta('1d')
    for path in file_paths:
        path = pickle_dir / Path(path)
        df_phases_h = list(
            map(lambda p: pd.read_pickle(path / ("h_phase" + p)),
                ['1', '2', '3']))
        for p, df_p in enumerate(df_phases_h):
            df_p.drop(columns=['Unit', 'AliasName'], inplace=True)
            df_p.to_pickle(path / ("h_phase" + str(p + 1)))
Exemple #7
0
def add_seasonal_data(pickle_dir=Path('pickles')):
    seasonal_data = pd.DataFrame()
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    day = pd.Timedelta('1d')
    for path in file_paths:
        print(path)
        path = pickle_dir / Path(path)
        df_phases = list(
            map(lambda p: pd.read_pickle(path / ("phase" + p))[['Value']],
                ['1', '2', '3']))
        weekday_dfs_phases = [[None for x in range(7)] for y in range(3)]
        min_date = min(list(map(lambda df: df.index.min(), df_phases))).date()
        max_date = max(list(map(lambda df: df.index.max(), df_phases))).date()
        for p, df_p in enumerate(df_phases):
            for start_time in pd.date_range(min_date, max_date, freq='d'):
                end_time = start_time + day
                df_p_day = df_p.loc[start_time:end_time]
                df_p_day_med = df_p_day.resample('30s').median().rename(
                    columns={'Value': str(start_time.date())})
                df_p_day_med.index = df_p_day_med.index.time
                weekday = start_time.date().weekday()
                # print(weekday_dfs_phases[p][weekday])
                if weekday_dfs_phases[p][weekday] is None:
                    weekday_df = df_p_day_med
                    weekday_dfs_phases[p][weekday] = weekday_df
                else:
                    weekday_df = weekday_dfs_phases[p][weekday]
                    weekday_df = weekday_df.join(df_p_day_med, how='outer')
                    weekday_dfs_phases[p][weekday] = weekday_df
        print("Split DF")
        for p, df_weekdays in enumerate(weekday_dfs_phases):
            for w, df in enumerate(df_weekdays):
                df['med'] = df.median(axis=1)
                #  print(df)
        df_phases_h = list(
            map(lambda p: pd.read_pickle(path / ("h_phase" + p)),
                ['1', '2', '3']))
        print(df_phases_h)
        for p, df_p in enumerate(df_phases_h):
            print(p)
            df_weekdays = weekday_dfs_phases[p]
            df_p['SeasDif'] = df_p.apply(
                lambda row:
                (row['Value'] - df_weekdays[row.name.weekday()].loc[
                    (row.name - datetime.timedelta(
                        seconds=row.name.second % 30,
                        microseconds=row.name.microsecond)).time()]['med']),
                axis=1)
            print(df_p)
            df_p.to_pickle(path / ("h_phase" + str(p + 1)))
Exemple #8
0
def add_time_gaps(pickle_dir=Path('pickles')):
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    day = pd.Timedelta('1d')
    for path in file_paths:
        print(path)
        path = pickle_dir / Path(path)
        df_phases_h = list(
            map(lambda p: pd.read_pickle(path / ("h_phase" + p)),
                ['1', '2', '3']))
        for p, df_p in enumerate(df_phases_h):
            df_p['time_passed'] = df_p.index.to_series().diff(
            ).dt.total_seconds()
            df_p.to_pickle(path / ("h_phase" + str(p + 1)))
Exemple #9
0
def create_season_pickle(pickle_dir=Path('pickles')):
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    for path in file_paths:
        print(path)
        station_name = path
        df_mean_season = pd.Series()
        df_mean_pickle = pd.read_pickle(pickle_dir /
                                        (str(path) + 'aggregation'))
        print('len mean_pickle: ' + str(len(df_mean_pickle)))
        # df_mean_pickle = df_mean_pickle.iloc[:100800]
        print(df_mean_pickle)
        column_name = 'windowed_means'
        # df_mean_pickle = lf.generators.add_daytypes(df_mean_pickle)

        # df_mean_pickle = lf.generators.add_holidays(df_mean_pickle, 'NW')
        holidays_nrw = list(holidays.DE(years=2017, state='NW').keys())
        # df_mean_pickle_restday = df_mean_pickle[
        #     ((df_mean_pickle.is_saturday == 1) | (df_mean_pickle.is_sunday == 1) | (df_mean_pickle.is_holiday == True))]
        # df_mean_pickle_workday = df_mean_pickle[
        #     True ^ ((df_mean_pickle.is_saturday == 1) | (df_mean_pickle.is_sunday == 1) | (
        #                 df_mean_pickle.is_holiday == True))]

        print(holidays_nrw)
        # test = df_mean_pickle[df_mean_pickle.index.isin(holidays_nrw)]
        # print(test)

        df_mean_pickle_restday = df_mean_pickle[(
            (df_mean_pickle.index.dayofweek >= 5) |
            (df_mean_pickle.index).isin(holidays_nrw))]
        df_mean_pickle_workday = df_mean_pickle[True ^ (
            (df_mean_pickle.index.dayofweek >= 5)
            | (df_mean_pickle.index).isin(holidays_nrw))]
        print('Split_dataframe')
        for i, df_mean_pickle_typeday in enumerate(
            [df_mean_pickle_restday, df_mean_pickle_workday]):
            df_mean_pickle_typeday = df_mean_pickle_typeday[[station_name
                                                             ]].dropna()
            v1s = []
            min_date = df_mean_pickle_typeday.index.min()
            max_date = df_mean_pickle_typeday.index.max()
            three_w_timedelta = pd.Timedelta('3w')
            old_window_min_date = min_date.date()
            old_window_max_date = max_date.date()
            print(min_date)

            for index, row in df_mean_pickle_typeday.iterrows():
                window_min_date = max(min_date, index - three_w_timedelta)
                window_max_date = min(max_date, index + three_w_timedelta)
                window_slice = df_mean_pickle_typeday.loc[
                    window_min_date:window_max_date]
                window_slice = window_slice.loc[window_slice.index.time ==
                                                index.time()]
                v1 = window_slice[station_name].mean()
                if old_window_min_date != window_min_date.date(
                ) or old_window_max_date != window_max_date.date():
                    print(str(window_min_date) + ' -> ' + str(window_max_date))
                    old_window_min_date = window_min_date.date()
                    old_window_max_date = window_max_date.date()
                    print(window_slice)
                    print(v1)
                v1s.append(v1)
            df_mean_pickle_typeday[column_name] = v1s
            print('len v1s: ' + str(len(v1s)))
            print(df_mean_pickle_typeday[[column_name]])
            print(df_mean_season)
            df_mean_season = pd.concat(
                [df_mean_season, df_mean_pickle_typeday[column_name]],
                sort=True)
        print('len mean_season: ' + str(df_mean_season.size))
        print(df_mean_season)
        df_mean_season.to_pickle(pickle_dir /
                                 (str(path) + 'season_aggregation'))