Beispiel #1
0
    def test_sub_offset(self):
        # freq is DateOffset
        for freq in ['A', '2A', '3A']:
            p = Period('2011', freq=freq)
            assert p - offsets.YearEnd(2) == Period('2009', freq=freq)

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p - o

        for freq in ['M', '2M', '3M']:
            p = Period('2011-03', freq=freq)
            assert p - offsets.MonthEnd(2) == Period('2011-01', freq=freq)
            assert p - offsets.MonthEnd(12) == Period('2010-03', freq=freq)

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p - o

        # freq is Tick
        for freq in ['D', '2D', '3D']:
            p = Period('2011-04-01', freq=freq)
            assert p - offsets.Day(5) == Period('2011-03-27', freq=freq)
            assert p - offsets.Hour(24) == Period('2011-03-31', freq=freq)
            assert p - np.timedelta64(2, 'D') == Period(
                '2011-03-30', freq=freq)
            assert p - np.timedelta64(3600 * 24, 's') == Period(
                '2011-03-31', freq=freq)
            assert p - timedelta(-2) == Period('2011-04-03', freq=freq)
            assert p - timedelta(hours=48) == Period('2011-03-30', freq=freq)

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(4, 'h'),
                      timedelta(hours=23)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p - o

        for freq in ['H', '2H', '3H']:
            p = Period('2011-04-01 09:00', freq=freq)
            assert p - offsets.Day(2) == Period('2011-03-30 09:00', freq=freq)
            assert p - offsets.Hour(3) == Period('2011-04-01 06:00', freq=freq)
            assert p - np.timedelta64(3, 'h') == Period(
                '2011-04-01 06:00', freq=freq)
            assert p - np.timedelta64(3600, 's') == Period(
                '2011-04-01 08:00', freq=freq)
            assert p - timedelta(minutes=120) == Period(
                '2011-04-01 07:00', freq=freq)
            assert p - timedelta(days=4, minutes=180) == Period(
                '2011-03-28 06:00', freq=freq)

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(3200, 's'),
                      timedelta(hours=23, minutes=30)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p - o
Beispiel #2
0
    def test_sub_offset_nat(self):
        # freq is DateOffset
        for freq in ['A', '2A', '3A']:
            p = Period('NaT', freq=freq)
            for o in [offsets.YearEnd(2)]:
                assert p - o is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                assert p - o is tslib.NaT

        for freq in ['M', '2M', '3M']:
            p = Period('NaT', freq=freq)
            for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]:
                assert p - o is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                assert p - o is tslib.NaT

        # freq is Tick
        for freq in ['D', '2D', '3D']:
            p = Period('NaT', freq=freq)
            for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'),
                      np.timedelta64(3600 * 24, 's'), timedelta(-2),
                      timedelta(hours=48)]:
                assert p - o is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(4, 'h'),
                      timedelta(hours=23)]:
                assert p - o is tslib.NaT

        for freq in ['H', '2H', '3H']:
            p = Period('NaT', freq=freq)
            for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'),
                      np.timedelta64(3600, 's'), timedelta(minutes=120),
                      timedelta(days=4, minutes=180)]:
                assert p - o is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(3200, 's'),
                      timedelta(hours=23, minutes=30)]:
                assert p - o is tslib.NaT
Beispiel #3
0
    msg = f"'{symbol}' not supported between instances of 'NaTType' and '{other_type}'"
    with pytest.raises(TypeError, match=msg):
        op(NaT, other)

    msg = f"'{symbol}' not supported between instances of '{other_type}' and 'NaTType'"
    with pytest.raises(TypeError, match=msg):
        op(other, NaT)


@pytest.mark.parametrize(
    "obj",
    [
        offsets.YearEnd(2),
        offsets.YearBegin(2),
        offsets.MonthBegin(1),
        offsets.MonthEnd(2),
        offsets.MonthEnd(12),
        offsets.Day(2),
        offsets.Day(5),
        offsets.Hour(24),
        offsets.Hour(3),
        offsets.Minute(),
        np.timedelta64(3, "h"),
        np.timedelta64(4, "h"),
        np.timedelta64(3200, "s"),
        np.timedelta64(3600, "s"),
        np.timedelta64(3600 * 24, "s"),
        np.timedelta64(2, "D"),
        np.timedelta64(365, "D"),
        timedelta(-2),
Beispiel #4
0
    def test_add_offset_nat(self):
        # freq is DateOffset
        for freq in ['A', '2A', '3A']:
            p = Period('NaT', freq=freq)
            for o in [offsets.YearEnd(2)]:
                assert p + o is tslib.NaT
                assert o + p is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                assert p + o is tslib.NaT

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    assert o + p is tslib.NaT

        for freq in ['M', '2M', '3M']:
            p = Period('NaT', freq=freq)
            for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]:
                assert p + o is tslib.NaT

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    assert o + p is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                assert p + o is tslib.NaT

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    assert o + p is tslib.NaT

        # freq is Tick
        for freq in ['D', '2D', '3D']:
            p = Period('NaT', freq=freq)
            for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'),
                      np.timedelta64(3600 * 24, 's'), timedelta(-2),
                      timedelta(hours=48)]:
                assert p + o is tslib.NaT

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    assert o + p is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(4, 'h'),
                      timedelta(hours=23)]:
                assert p + o is tslib.NaT

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    assert o + p is tslib.NaT

        for freq in ['H', '2H', '3H']:
            p = Period('NaT', freq=freq)
            for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'),
                      np.timedelta64(3600, 's'), timedelta(minutes=120),
                      timedelta(days=4, minutes=180)]:
                assert p + o is tslib.NaT

                if not isinstance(o, np.timedelta64):
                    assert o + p is tslib.NaT

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(3200, 's'),
                      timedelta(hours=23, minutes=30)]:
                assert p + o is tslib.NaT

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    assert o + p is tslib.NaT
Beispiel #5
0
    offsets.BQuarterBegin(),
    offsets.BMonthEnd(),
    offsets.BMonthBegin(),
    offsets.CustomBusinessDay(),
    offsets.CustomBusinessDay(calendar=hcal),
    offsets.CustomBusinessMonthBegin(calendar=hcal),
    offsets.CustomBusinessMonthEnd(calendar=hcal),
    offsets.CustomBusinessMonthEnd(calendar=hcal),
]
other_offsets = [
    offsets.YearEnd(),
    offsets.YearBegin(),
    offsets.QuarterEnd(),
    offsets.QuarterBegin(),
    offsets.MonthEnd(),
    offsets.MonthBegin(),
    offsets.DateOffset(months=2, days=2),
    offsets.BusinessDay(),
    offsets.SemiMonthEnd(),
    offsets.SemiMonthBegin(),
]
offset_objs = non_apply + other_offsets


class OnOffset:

    params = offset_objs
    param_names = ["offset"]

    def setup(self, offset):
        self.dates = [
Beispiel #6
0
    def test_add_offset(self):
        # freq is DateOffset
        for freq in ['A', '2A', '3A']:
            p = Period('2011', freq=freq)
            exp = Period('2013', freq=freq)
            assert p + offsets.YearEnd(2) == exp
            assert offsets.YearEnd(2) + p == exp

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p + o

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    with pytest.raises(period.IncompatibleFrequency):
                        o + p

        for freq in ['M', '2M', '3M']:
            p = Period('2011-03', freq=freq)
            exp = Period('2011-05', freq=freq)
            assert p + offsets.MonthEnd(2) == exp
            assert offsets.MonthEnd(2) + p == exp

            exp = Period('2012-03', freq=freq)
            assert p + offsets.MonthEnd(12) == exp
            assert offsets.MonthEnd(12) + p == exp

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(365, 'D'),
                      timedelta(365)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p + o

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    with pytest.raises(period.IncompatibleFrequency):
                        o + p

        # freq is Tick
        for freq in ['D', '2D', '3D']:
            p = Period('2011-04-01', freq=freq)

            exp = Period('2011-04-06', freq=freq)
            assert p + offsets.Day(5) == exp
            assert offsets.Day(5) + p == exp

            exp = Period('2011-04-02', freq=freq)
            assert p + offsets.Hour(24) == exp
            assert offsets.Hour(24) + p == exp

            exp = Period('2011-04-03', freq=freq)
            assert p + np.timedelta64(2, 'D') == exp
            with pytest.raises(TypeError):
                np.timedelta64(2, 'D') + p

            exp = Period('2011-04-02', freq=freq)
            assert p + np.timedelta64(3600 * 24, 's') == exp
            with pytest.raises(TypeError):
                np.timedelta64(3600 * 24, 's') + p

            exp = Period('2011-03-30', freq=freq)
            assert p + timedelta(-2) == exp
            assert timedelta(-2) + p == exp

            exp = Period('2011-04-03', freq=freq)
            assert p + timedelta(hours=48) == exp
            assert timedelta(hours=48) + p == exp

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(4, 'h'),
                      timedelta(hours=23)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p + o

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    with pytest.raises(period.IncompatibleFrequency):
                        o + p

        for freq in ['H', '2H', '3H']:
            p = Period('2011-04-01 09:00', freq=freq)

            exp = Period('2011-04-03 09:00', freq=freq)
            assert p + offsets.Day(2) == exp
            assert offsets.Day(2) + p == exp

            exp = Period('2011-04-01 12:00', freq=freq)
            assert p + offsets.Hour(3) == exp
            assert offsets.Hour(3) + p == exp

            exp = Period('2011-04-01 12:00', freq=freq)
            assert p + np.timedelta64(3, 'h') == exp
            with pytest.raises(TypeError):
                np.timedelta64(3, 'h') + p

            exp = Period('2011-04-01 10:00', freq=freq)
            assert p + np.timedelta64(3600, 's') == exp
            with pytest.raises(TypeError):
                np.timedelta64(3600, 's') + p

            exp = Period('2011-04-01 11:00', freq=freq)
            assert p + timedelta(minutes=120) == exp
            assert timedelta(minutes=120) + p == exp

            exp = Period('2011-04-05 12:00', freq=freq)
            assert p + timedelta(days=4, minutes=180) == exp
            assert timedelta(days=4, minutes=180) + p == exp

            for o in [offsets.YearBegin(2), offsets.MonthBegin(1),
                      offsets.Minute(), np.timedelta64(3200, 's'),
                      timedelta(hours=23, minutes=30)]:
                with pytest.raises(period.IncompatibleFrequency):
                    p + o

                if isinstance(o, np.timedelta64):
                    with pytest.raises(TypeError):
                        o + p
                else:
                    with pytest.raises(period.IncompatibleFrequency):
                        o + p
Beispiel #7
0
 def offset(self):
     return offsets.MonthBegin()
Beispiel #8
0
from pandas import offsets, date_range, to_datetime, DataFrame
import numpy as np
from math import floor

lag_trim = offsets.MonthBegin(3)


def extrapol_affine(data_init, nom_serie, multi, addi, date_start, date_end):
    """
    Prolonge la base data_init par extrapolation des séries contenues dans nom_serie 
    en les multipliant par multi et en additionnant addi
    entre les dates date_start et date_end (définies comme chaînes de caractères au format YYYYQ)
    """
    data_extrapol = data_init.copy()
    iter_dates = date_range(start=to_datetime(date_start),
                            end=to_datetime(date_end),
                            freq="QS")

    for item in iter_dates:
        data_extrapol.loc[item, nom_serie] = data_extrapol.loc[
            item - lag_trim, nom_serie] * multi + addi

    return data_extrapol


def extrapol_prolonge_taux_moyen(data_init, nom_serie, date_t1, date_t2,
                                 date_start, date_end):
    """
    Prolonge la base data_init par extrapolation en taux de croissance 
    des séries contenues dans nom_serie entre date_start jusqu'à l'horizon voulu date_end, au taux moyen observé
    entre t1+1 et t2 
Beispiel #9
0
    def test_sub_offset(self):
        # freq is DateOffset
        msg = "Input has different freq|Input cannot be converted to Period"
        for freq in ["A", "2A", "3A"]:
            p = Period("2011", freq=freq)
            assert p - offsets.YearEnd(2) == Period("2009", freq=freq)

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(365, "D"),
                timedelta(365),
            ]:
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p - o

        for freq in ["M", "2M", "3M"]:
            p = Period("2011-03", freq=freq)
            assert p - offsets.MonthEnd(2) == Period("2011-01", freq=freq)
            assert p - offsets.MonthEnd(12) == Period("2010-03", freq=freq)

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(365, "D"),
                timedelta(365),
            ]:
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p - o

        # freq is Tick
        for freq in ["D", "2D", "3D"]:
            p = Period("2011-04-01", freq=freq)
            assert p - offsets.Day(5) == Period("2011-03-27", freq=freq)
            assert p - offsets.Hour(24) == Period("2011-03-31", freq=freq)
            assert p - np.timedelta64(2, "D") == Period("2011-03-30", freq=freq)
            assert p - np.timedelta64(3600 * 24, "s") == Period("2011-03-31", freq=freq)
            assert p - timedelta(-2) == Period("2011-04-03", freq=freq)
            assert p - timedelta(hours=48) == Period("2011-03-30", freq=freq)

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(4, "h"),
                timedelta(hours=23),
            ]:
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p - o

        for freq in ["H", "2H", "3H"]:
            p = Period("2011-04-01 09:00", freq=freq)
            assert p - offsets.Day(2) == Period("2011-03-30 09:00", freq=freq)
            assert p - offsets.Hour(3) == Period("2011-04-01 06:00", freq=freq)
            assert p - np.timedelta64(3, "h") == Period("2011-04-01 06:00", freq=freq)
            assert p - np.timedelta64(3600, "s") == Period(
                "2011-04-01 08:00", freq=freq
            )
            assert p - timedelta(minutes=120) == Period("2011-04-01 07:00", freq=freq)
            assert p - timedelta(days=4, minutes=180) == Period(
                "2011-03-28 06:00", freq=freq
            )

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(3200, "s"),
                timedelta(hours=23, minutes=30),
            ]:
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p - o
Beispiel #10
0
    def test_add_offset(self):
        # freq is DateOffset
        for freq in ["A", "2A", "3A"]:
            p = Period("2011", freq=freq)
            exp = Period("2013", freq=freq)
            assert p + offsets.YearEnd(2) == exp
            assert offsets.YearEnd(2) + p == exp

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(365, "D"),
                timedelta(365),
            ]:
                msg = "Input has different freq|Input cannot be converted to Period"
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p + o

                if isinstance(o, np.timedelta64):
                    msg = "cannot use operands with types"
                    with pytest.raises(TypeError, match=msg):
                        o + p
                else:
                    msg = "|".join(
                        [
                            "Input has different freq",
                            "Input cannot be converted to Period",
                        ]
                    )
                    with pytest.raises(IncompatibleFrequency, match=msg):
                        o + p

        for freq in ["M", "2M", "3M"]:
            p = Period("2011-03", freq=freq)
            exp = Period("2011-05", freq=freq)
            assert p + offsets.MonthEnd(2) == exp
            assert offsets.MonthEnd(2) + p == exp

            exp = Period("2012-03", freq=freq)
            assert p + offsets.MonthEnd(12) == exp
            assert offsets.MonthEnd(12) + p == exp

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(365, "D"),
                timedelta(365),
            ]:
                msg = "Input has different freq|Input cannot be converted to Period"
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p + o

                if isinstance(o, np.timedelta64):
                    msg = "cannot use operands with types"
                    with pytest.raises(TypeError, match=msg):
                        o + p
                else:
                    msg = "|".join(
                        [
                            "Input has different freq",
                            "Input cannot be converted to Period",
                        ]
                    )
                    with pytest.raises(IncompatibleFrequency, match=msg):
                        o + p

        # freq is Tick
        for freq in ["D", "2D", "3D"]:
            p = Period("2011-04-01", freq=freq)

            exp = Period("2011-04-06", freq=freq)
            assert p + offsets.Day(5) == exp
            assert offsets.Day(5) + p == exp

            exp = Period("2011-04-02", freq=freq)
            assert p + offsets.Hour(24) == exp
            assert offsets.Hour(24) + p == exp

            exp = Period("2011-04-03", freq=freq)
            assert p + np.timedelta64(2, "D") == exp
            msg = "cannot use operands with types"
            with pytest.raises(TypeError, match=msg):
                np.timedelta64(2, "D") + p

            exp = Period("2011-04-02", freq=freq)
            assert p + np.timedelta64(3600 * 24, "s") == exp
            with pytest.raises(TypeError, match=msg):
                np.timedelta64(3600 * 24, "s") + p

            exp = Period("2011-03-30", freq=freq)
            assert p + timedelta(-2) == exp
            assert timedelta(-2) + p == exp

            exp = Period("2011-04-03", freq=freq)
            assert p + timedelta(hours=48) == exp
            assert timedelta(hours=48) + p == exp

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(4, "h"),
                timedelta(hours=23),
            ]:
                msg = "Input has different freq|Input cannot be converted to Period"
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p + o

                if isinstance(o, np.timedelta64):
                    msg = "cannot use operands with types"
                    with pytest.raises(TypeError, match=msg):
                        o + p
                else:
                    msg = "|".join(
                        [
                            "Input has different freq",
                            "Input cannot be converted to Period",
                        ]
                    )
                    with pytest.raises(IncompatibleFrequency, match=msg):
                        o + p

        for freq in ["H", "2H", "3H"]:
            p = Period("2011-04-01 09:00", freq=freq)

            exp = Period("2011-04-03 09:00", freq=freq)
            assert p + offsets.Day(2) == exp
            assert offsets.Day(2) + p == exp

            exp = Period("2011-04-01 12:00", freq=freq)
            assert p + offsets.Hour(3) == exp
            assert offsets.Hour(3) + p == exp

            msg = "cannot use operands with types"
            exp = Period("2011-04-01 12:00", freq=freq)
            assert p + np.timedelta64(3, "h") == exp
            with pytest.raises(TypeError, match=msg):
                np.timedelta64(3, "h") + p

            exp = Period("2011-04-01 10:00", freq=freq)
            assert p + np.timedelta64(3600, "s") == exp
            with pytest.raises(TypeError, match=msg):
                np.timedelta64(3600, "s") + p

            exp = Period("2011-04-01 11:00", freq=freq)
            assert p + timedelta(minutes=120) == exp
            assert timedelta(minutes=120) + p == exp

            exp = Period("2011-04-05 12:00", freq=freq)
            assert p + timedelta(days=4, minutes=180) == exp
            assert timedelta(days=4, minutes=180) + p == exp

            for o in [
                offsets.YearBegin(2),
                offsets.MonthBegin(1),
                offsets.Minute(),
                np.timedelta64(3200, "s"),
                timedelta(hours=23, minutes=30),
            ]:
                msg = "Input has different freq|Input cannot be converted to Period"
                with pytest.raises(IncompatibleFrequency, match=msg):
                    p + o

                if isinstance(o, np.timedelta64):
                    msg = "cannot use operands with types"
                    with pytest.raises(TypeError, match=msg):
                        o + p
                else:
                    msg = "|".join(
                        [
                            "Input has different freq",
                            "Input cannot be converted to Period",
                        ]
                    )
                    with pytest.raises(IncompatibleFrequency, match=msg):
                        o + p
Beispiel #11
0
#---------------------------------------------------------------------------------------------------
# input the data
#---------------------------------------------------------------------------------------------------

with ExcelFile(r'.\inputs\Sales Department Input.xlsx') as xl:
    df = concat([read_excel(xl, s).assign(sheet=s) for s in xl.sheet_names])\
         .rename(columns={'Unnamed: 7' : 'YTD Total'})

#---------------------------------------------------------------------------------------------------
# process the data
#---------------------------------------------------------------------------------------------------

# get the month (for total rows, use the date from the prior row) -- use for YTD calc
df['Month'] = where(
    df['Salesperson'].notna(),
    df['Date'].shift(1) - offsets.MonthBegin(1, normalize=True),
    df['Date']).astype('datetime64[M]')
df['Year'] = df['Month'].dt.year

# fill in the Salesperson names for each row
df['Salesperson'] = df['Salesperson'].fillna(method='bfill')

# reshape the data so all the bike types are in a single column
df_m = df[df['Date'].notna()].drop(columns=['RowID', 'Total', 'YTD Total', 'sheet'])\
         .melt(id_vars=['Salesperson', 'Date', 'Month', 'Year'],
               var_name='Bike Type', value_name='Sales')

# get the monthly totals by salesperson, and merge to get the YTD values for the first month
df_tot = df_m.groupby(['Month', 'Year', 'Salesperson'])['Sales'].sum().reset_index()\
         .merge(df[df['YTD Total'].notna()][['Month', 'Salesperson', 'YTD Total']],
                how='outer', on=['Month', 'Salesperson'])\