Exemple #1
0
    def __init__(self,
                 file_granularity: str,
                 file_path_format: str,
                 is_timezone_variable: bool,
                 timezone: timezone = None,
                 part_prefix: str = None,
                 file_extension='.csv'):

        super(ReaderConfig,
              self).__init__('Config for reading data from different mediums')

        assert file_granularity in ['daily', 'monthly', 'weekly']

        self.file_granularity = file_granularity

        if self.file_granularity == 'daily':
            self.date_offset = timedelta(days=1)
        elif self.file_granularity == 'monthly':
            self.date_offset = offsets.MonthBegin()
        elif self.file_granularity == 'weekly':
            self.date_offset = offsets.Week(weekday=0)

        self.file_path_format = file_path_format

        self.file_extension = file_extension

        self.partwise = part_prefix is not None
        self.part_prefix = part_prefix

        self.is_timezone_variable = is_timezone_variable
        self.timezone = timezone
    def test_valid(self):

        df = self.regular

        # not a valid freq
        with pytest.raises(ValueError):
            df.rolling(window="foobar")

        # not a datetimelike index
        with pytest.raises(ValueError):
            df.reset_index().rolling(window="foobar")

        # non-fixed freqs
        for freq in ["2MS", offsets.MonthBegin(2)]:
            with pytest.raises(ValueError):
                df.rolling(window=freq)

        for freq in ["1D", offsets.Day(2), "2ms"]:
            df.rolling(window=freq)

        # non-integer min_periods
        for minp in [1.0, "foo", np.array([1, 2, 3])]:
            with pytest.raises(ValueError):
                df.rolling(window="1D", min_periods=minp)

        # center is not implemented
        with pytest.raises(NotImplementedError):
            df.rolling(window="1D", center=True)
    def test_valid(self):

        df = self.regular

        # not a valid freq
        msg = "passed window foobar is not compatible with a datetimelike index"
        with pytest.raises(ValueError, match=msg):
            df.rolling(window="foobar")
        # not a datetimelike index
        msg = "window must be an integer"
        with pytest.raises(ValueError, match=msg):
            df.reset_index().rolling(window="foobar")

        # non-fixed freqs
        msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
        for freq in ["2MS", offsets.MonthBegin(2)]:
            with pytest.raises(ValueError, match=msg):
                df.rolling(window=freq)

        for freq in ["1D", offsets.Day(2), "2ms"]:
            df.rolling(window=freq)

        # non-integer min_periods
        msg = (r"local variable 'minp' referenced before assignment|"
               "min_periods must be an integer")
        for minp in [1.0, "foo", np.array([1, 2, 3])]:
            with pytest.raises(ValueError, match=msg):
                df.rolling(window="1D", min_periods=minp)

        # center is not implemented
        msg = "center is not implemented for datetimelike and offset based windows"
        with pytest.raises(NotImplementedError, match=msg):
            df.rolling(window="1D", center=True)
Exemple #4
0
def get_marriage_mask(dates: pd.DataFrame, min_start='1970-01-01',
                      max_end='2017-01-01') -> np.ndarray:
    # Result is <inclusive, exclusive>
    dates = dates.copy()
    min_start = pd.to_datetime(min_start)
    max_end = pd.to_datetime(max_end)

    # A hack to validate NaT comparisons
    dates = dates.fillna(max_end + offsets.MonthBegin())
    idx = pd.date_range(min_start, max_end, freq='MS')[:, None]
    marriage = dates['start'].values
    divorce = dates['end'].values
    mask = (idx >= marriage) & (idx < divorce)
    return pd.DataFrame(mask, index=idx.squeeze(), columns=dates.index)
Exemple #5
0
def regress_by_store(df):
    ret_list = []
    month_ends = pd.date_range(start='01/01/2016', end='05/01/2017', freq='M')
    for month_end in month_ends:
        quarter_start = month_end - offsets.MonthBegin(3)
        quarter_df = take_df_by_valid_period(df, quarter_start, month_end)
        if quarter_df.empty:
            continue
        next_month_start = month_end + offsets.MonthBegin(1)
        next_month_end = month_end + offsets.MonthEnd(1)
        next_month_df = take_df_by_period(df, next_month_start, next_month_end)
        if next_month_df.empty:
            continue
        quarter_y_pred = do_regression(quarter_df, next_month_df)

        year_start = month_end - offsets.MonthBegin(12)
        year_df = take_df_by_valid_period(df, year_start, month_end)
        year_y_pred = do_regression(year_df, next_month_df)

        temp_df = pd.DataFrame(index=next_month_df.index)
        temp_df["quarter_regress_no_dow"] = quarter_y_pred
        temp_df["year_regress_no_dow"] = year_y_pred
        ret_list.append(temp_df)
    return ret_list
Exemple #6
0
def test_ms_vs_capital_ms():
    left = frequencies._get_offset("ms")
    right = frequencies._get_offset("MS")

    assert left == offsets.Milli()
    assert right == offsets.MonthBegin()
Exemple #7
0
def test_ms_vs_MS():
    left = frequencies.get_offset('ms')
    right = frequencies.get_offset('MS')
    assert left == offsets.Milli()
    assert right == offsets.MonthBegin()
Exemple #8
0
def constrain_horizon(
    r,
    strict=False,
    cust=None,
    years=0,
    quarters=0,
    months=0,
    days=0,
    weeks=0,
    year=None,
    month=None,
    day=None,
):
    """Constrain a Series/DataFrame to a specified lookback period.

    See the documentation for dateutil.relativedelta:
    dateutil.readthedocs.io/en/stable/relativedelta.html

    Parameters
    ----------
    r : DataFrame or Series
        The target pandas object to constrain
    strict : bool, default False
        If True, raise Error if the implied start date on the horizon predates
        the actual start date of `r`.  If False, just return `r` in this
        situation
    years, months, weeks, days : int, default 0
        Relative information; specify as positive to subtract periods.  Adding
        or subtracting a relativedelta with relative information performs
        the corresponding aritmetic operation on the original datetime value
        with the information in the relativedelta
    quarters : int, default 0
        Similar to the other plural relative info periods above, but note that
        this param is custom here.  (It is not a standard relativedelta param)
    year, month, day : int, default None
        Absolute information; specify as positive to subtract periods.  Adding
        relativedelta with absolute information does not perform an aritmetic
        operation, but rather REPLACES the corresponding value in the
        original datetime with the value(s) in relativedelta
    """

    textnum = {
        "zero": 0,
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10,
        "eleven": 11,
        "twelve": 12,
        "thirteen": 13,
        "fourteen": 14,
        "fifteen": 15,
        "sixteen": 16,
        "seventeen": 17,
        "eighteen": 18,
        "nineteen": 19,
        "twenty": 20,
        "twenty four": 24,
        "thirty six": 36,
    }

    relativedeltas = years, quarters, months, days, weeks, year, month, day
    if cust is not None and any(relativedeltas):
        raise ValueError("Cannot specify competing (nonzero) values for both"
                         " `cust` and other parameters.")
    if cust is not None:
        cust = cust.lower()

        if cust.endswith("y"):
            years = int(re.search(r"\d+", cust).group(0))

        elif cust.endswith("m"):
            months = int(re.search(r"\d+", cust).group(0))

        elif cust.endswith(("years ago", "year ago", "year", "years")):
            pos = cust.find(" year")
            years = textnum[cust[:pos].replace("-", "")]

        elif cust.endswith(("months ago", "month ago", "month", "months")):
            pos = cust.find(" month")
            months = textnum[cust[:pos].replace("-", "")]

        else:
            raise ValueError("`cust` not recognized.")

    # Convert quarters to months & combine for MonthOffset
    months += quarters * 3

    # Start date will be computed relative to `end`
    end = r.index[-1]

    # Establish some funky date conventions assumed in finance.  If the end
    # date is 6/30, the date *3 months prior* is 3/31, not 3/30 as would be
    # produced by dateutil.relativedelta.

    if end.is_month_end and days == 0 and weeks == 0:
        if years != 0:
            years *= 12
            months += years
        start = end - offsets.MonthBegin(months)
    else:
        start = end - offsets.DateOffset(
            years=years,
            months=months,
            days=days - 1,
            weeks=weeks,
            year=year,
            month=month,
            day=day,
        )
    if strict and start < r.index[0]:
        raise ValueError("`start` pre-dates first element of the Index, %s" %
                         r.index[0])
    return r[start:end]
class TestRollingTS:

    # rolling time-series friendly
    # xref GH13327

    def test_doc_string(self):

        df = DataFrame(
            {"B": [0, 1, 2, np.nan, 4]},
            index=[
                Timestamp("20130101 09:00:00"),
                Timestamp("20130101 09:00:02"),
                Timestamp("20130101 09:00:03"),
                Timestamp("20130101 09:00:05"),
                Timestamp("20130101 09:00:06"),
            ],
        )
        df
        df.rolling("2s").sum()

    def test_invalid_window_non_int(self, regular):

        # not a valid freq
        msg = "passed window foobar is not compatible with a datetimelike index"
        with pytest.raises(ValueError, match=msg):
            regular.rolling(window="foobar")
        # not a datetimelike index
        msg = "window must be an integer"
        with pytest.raises(ValueError, match=msg):
            regular.reset_index().rolling(window="foobar")

    @pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)])
    def test_invalid_window_nonfixed(self, freq, regular):

        # non-fixed freqs
        msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
        with pytest.raises(ValueError, match=msg):
            regular.rolling(window=freq)

    @pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"])
    def test_valid_window(self, freq, regular):
        regular.rolling(window=freq)

    @pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])])
    def test_invalid_minp(self, minp, regular):
        # non-integer min_periods
        msg = (r"local variable 'minp' referenced before assignment|"
               "min_periods must be an integer")
        with pytest.raises(ValueError, match=msg):
            regular.rolling(window="1D", min_periods=minp)

    def test_on(self, regular):

        df = regular

        # not a valid column
        msg = (r"invalid on specified as foobar, must be a column "
               "\\(of DataFrame\\), an Index or None")
        with pytest.raises(ValueError, match=msg):
            df.rolling(window="2s", on="foobar")

        # column is valid
        df = df.copy()
        df["C"] = date_range("20130101", periods=len(df))
        df.rolling(window="2d", on="C").sum()

        # invalid columns
        msg = "window must be an integer"
        with pytest.raises(ValueError, match=msg):
            df.rolling(window="2d", on="B")

        # ok even though on non-selected
        df.rolling(window="2d", on="C").B.sum()

    def test_monotonic_on(self):

        # on/index must be monotonic
        df = DataFrame({
            "A": date_range("20130101", periods=5, freq="s"),
            "B": range(5)
        })

        assert df.A.is_monotonic_increasing
        df.rolling("2s", on="A").sum()

        df = df.set_index("A")
        assert df.index.is_monotonic_increasing
        df.rolling("2s").sum()

    def test_non_monotonic_on(self):
        # GH 19248
        df = DataFrame({
            "A": date_range("20130101", periods=5, freq="s"),
            "B": range(5)
        })
        df = df.set_index("A")
        non_monotonic_index = df.index.to_list()
        non_monotonic_index[0] = non_monotonic_index[3]
        df.index = non_monotonic_index

        assert not df.index.is_monotonic_increasing

        msg = "index values must be monotonic"
        with pytest.raises(ValueError, match=msg):
            df.rolling("2s").sum()

        df = df.reset_index()

        msg = (r"invalid on specified as A, must be a column "
               "\\(of DataFrame\\), an Index or None")
        with pytest.raises(ValueError, match=msg):
            df.rolling("2s", on="A").sum()

    def test_frame_on(self):

        df = DataFrame({
            "B":
            range(5),
            "C":
            date_range("20130101 09:00:00", periods=5, freq="3s")
        })

        df["A"] = [
            Timestamp("20130101 09:00:00"),
            Timestamp("20130101 09:00:02"),
            Timestamp("20130101 09:00:03"),
            Timestamp("20130101 09:00:05"),
            Timestamp("20130101 09:00:06"),
        ]

        # we are doing simulating using 'on'
        expected = df.set_index("A").rolling("2s").B.sum().reset_index(
            drop=True)

        result = df.rolling("2s", on="A").B.sum()
        tm.assert_series_equal(result, expected)

        # test as a frame
        # we should be ignoring the 'on' as an aggregation column
        # note that the expected is setting, computing, and resetting
        # so the columns need to be switched compared
        # to the actual result where they are ordered as in the
        # original
        expected = (df.set_index("A").rolling("2s")[[
            "B"
        ]].sum().reset_index()[["B", "A"]])

        result = df.rolling("2s", on="A")[["B"]].sum()
        tm.assert_frame_equal(result, expected)

    def test_frame_on2(self):

        # using multiple aggregation columns
        df = DataFrame(
            {
                "A": [0, 1, 2, 3, 4],
                "B": [0, 1, 2, np.nan, 4],
                "C":
                Index([
                    Timestamp("20130101 09:00:00"),
                    Timestamp("20130101 09:00:02"),
                    Timestamp("20130101 09:00:03"),
                    Timestamp("20130101 09:00:05"),
                    Timestamp("20130101 09:00:06"),
                ]),
            },
            columns=["A", "C", "B"],
        )

        expected1 = DataFrame(
            {
                "A": [0.0, 1, 3, 3, 7],
                "B": [0, 1, 3, np.nan, 4],
                "C": df["C"]
            },
            columns=["A", "C", "B"],
        )

        result = df.rolling("2s", on="C").sum()
        expected = expected1
        tm.assert_frame_equal(result, expected)

        expected = Series([0, 1, 3, np.nan, 4], name="B")
        result = df.rolling("2s", on="C").B.sum()
        tm.assert_series_equal(result, expected)

        expected = expected1[["A", "B", "C"]]
        result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
        tm.assert_frame_equal(result, expected)

    def test_basic_regular(self, regular):

        df = regular.copy()

        df.index = date_range("20130101", periods=5, freq="D")
        expected = df.rolling(window=1, min_periods=1).sum()
        result = df.rolling(window="1D").sum()
        tm.assert_frame_equal(result, expected)

        df.index = date_range("20130101", periods=5, freq="2D")
        expected = df.rolling(window=1, min_periods=1).sum()
        result = df.rolling(window="2D", min_periods=1).sum()
        tm.assert_frame_equal(result, expected)

        expected = df.rolling(window=1, min_periods=1).sum()
        result = df.rolling(window="2D", min_periods=1).sum()
        tm.assert_frame_equal(result, expected)

        expected = df.rolling(window=1).sum()
        result = df.rolling(window="2D").sum()
        tm.assert_frame_equal(result, expected)

    def test_min_periods(self, regular):

        # compare for min_periods
        df = regular

        # these slightly different
        expected = df.rolling(2, min_periods=1).sum()
        result = df.rolling("2s").sum()
        tm.assert_frame_equal(result, expected)

        expected = df.rolling(2, min_periods=1).sum()
        result = df.rolling("2s", min_periods=1).sum()
        tm.assert_frame_equal(result, expected)

    def test_closed(self, regular):

        # xref GH13965

        df = DataFrame(
            {"A": [1] * 5},
            index=[
                Timestamp("20130101 09:00:01"),
                Timestamp("20130101 09:00:02"),
                Timestamp("20130101 09:00:03"),
                Timestamp("20130101 09:00:04"),
                Timestamp("20130101 09:00:06"),
            ],
        )

        # closed must be 'right', 'left', 'both', 'neither'
        msg = "closed must be 'right', 'left', 'both' or 'neither'"
        with pytest.raises(ValueError, match=msg):
            regular.rolling(window="2s", closed="blabla")

        expected = df.copy()
        expected["A"] = [1.0, 2, 2, 2, 1]
        result = df.rolling("2s", closed="right").sum()
        tm.assert_frame_equal(result, expected)

        # default should be 'right'
        result = df.rolling("2s").sum()
        tm.assert_frame_equal(result, expected)

        expected = df.copy()
        expected["A"] = [1.0, 2, 3, 3, 2]
        result = df.rolling("2s", closed="both").sum()
        tm.assert_frame_equal(result, expected)

        expected = df.copy()
        expected["A"] = [np.nan, 1.0, 2, 2, 1]
        result = df.rolling("2s", closed="left").sum()
        tm.assert_frame_equal(result, expected)

        expected = df.copy()
        expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
        result = df.rolling("2s", closed="neither").sum()
        tm.assert_frame_equal(result, expected)

    def test_ragged_sum(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).sum()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).sum()
        expected = df.copy()
        expected["B"] = [0.0, 1, 3, 3, 7]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=2).sum()
        expected = df.copy()
        expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="3s", min_periods=1).sum()
        expected = df.copy()
        expected["B"] = [0.0, 1, 3, 5, 7]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="3s").sum()
        expected = df.copy()
        expected["B"] = [0.0, 1, 3, 5, 7]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="4s", min_periods=1).sum()
        expected = df.copy()
        expected["B"] = [0.0, 1, 3, 6, 9]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="4s", min_periods=3).sum()
        expected = df.copy()
        expected["B"] = [np.nan, np.nan, 3, 6, 9]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).sum()
        expected = df.copy()
        expected["B"] = [0.0, 1, 3, 6, 10]
        tm.assert_frame_equal(result, expected)

    def test_ragged_mean(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).mean()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).mean()
        expected = df.copy()
        expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
        tm.assert_frame_equal(result, expected)

    def test_ragged_median(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).median()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).median()
        expected = df.copy()
        expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
        tm.assert_frame_equal(result, expected)

    def test_ragged_quantile(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).quantile(0.5)
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).quantile(0.5)
        expected = df.copy()
        expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
        tm.assert_frame_equal(result, expected)

    def test_ragged_std(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).std(ddof=0)
        expected = df.copy()
        expected["B"] = [0.0] * 5
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="1s", min_periods=1).std(ddof=1)
        expected = df.copy()
        expected["B"] = [np.nan] * 5
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="3s", min_periods=1).std(ddof=0)
        expected = df.copy()
        expected["B"] = [0.0] + [0.5] * 4
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).std(ddof=1)
        expected = df.copy()
        expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
        tm.assert_frame_equal(result, expected)

    def test_ragged_var(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).var(ddof=0)
        expected = df.copy()
        expected["B"] = [0.0] * 5
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="1s", min_periods=1).var(ddof=1)
        expected = df.copy()
        expected["B"] = [np.nan] * 5
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="3s", min_periods=1).var(ddof=0)
        expected = df.copy()
        expected["B"] = [0.0] + [0.25] * 4
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).var(ddof=1)
        expected = df.copy()
        expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
        tm.assert_frame_equal(result, expected)

    def test_ragged_skew(self, ragged):

        df = ragged
        result = df.rolling(window="3s", min_periods=1).skew()
        expected = df.copy()
        expected["B"] = [np.nan] * 5
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).skew()
        expected = df.copy()
        expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
        tm.assert_frame_equal(result, expected)

    def test_ragged_kurt(self, ragged):

        df = ragged
        result = df.rolling(window="3s", min_periods=1).kurt()
        expected = df.copy()
        expected["B"] = [np.nan] * 5
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).kurt()
        expected = df.copy()
        expected["B"] = [np.nan] * 4 + [-1.2]
        tm.assert_frame_equal(result, expected)

    def test_ragged_count(self, ragged):

        df = ragged
        result = df.rolling(window="1s", min_periods=1).count()
        expected = df.copy()
        expected["B"] = [1.0, 1, 1, 1, 1]
        tm.assert_frame_equal(result, expected)

        df = ragged
        result = df.rolling(window="1s").count()
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).count()
        expected = df.copy()
        expected["B"] = [1.0, 1, 2, 1, 2]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=2).count()
        expected = df.copy()
        expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
        tm.assert_frame_equal(result, expected)

    def test_regular_min(self):

        df = DataFrame({
            "A": date_range("20130101", periods=5, freq="s"),
            "B": [0.0, 1, 2, 3, 4]
        }).set_index("A")
        result = df.rolling("1s").min()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        df = DataFrame({
            "A": date_range("20130101", periods=5, freq="s"),
            "B": [5, 4, 3, 4, 5]
        }).set_index("A")

        tm.assert_frame_equal(result, expected)
        result = df.rolling("2s").min()
        expected = df.copy()
        expected["B"] = [5.0, 4, 3, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling("5s").min()
        expected = df.copy()
        expected["B"] = [5.0, 4, 3, 3, 3]
        tm.assert_frame_equal(result, expected)

    def test_ragged_min(self, ragged):

        df = ragged

        result = df.rolling(window="1s", min_periods=1).min()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).min()
        expected = df.copy()
        expected["B"] = [0.0, 1, 1, 3, 3]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).min()
        expected = df.copy()
        expected["B"] = [0.0, 0, 0, 1, 1]
        tm.assert_frame_equal(result, expected)

    def test_perf_min(self):

        N = 10000

        dfp = DataFrame({"B": np.random.randn(N)},
                        index=date_range("20130101", periods=N, freq="s"))
        expected = dfp.rolling(2, min_periods=1).min()
        result = dfp.rolling("2s").min()
        assert ((result - expected) < 0.01).all().bool()

        expected = dfp.rolling(200, min_periods=1).min()
        result = dfp.rolling("200s").min()
        assert ((result - expected) < 0.01).all().bool()

    def test_ragged_max(self, ragged):

        df = ragged

        result = df.rolling(window="1s", min_periods=1).max()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="2s", min_periods=1).max()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

        result = df.rolling(window="5s", min_periods=1).max()
        expected = df.copy()
        expected["B"] = [0.0, 1, 2, 3, 4]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "freq, op, result_data",
        [
            ("ms", "min", [0.0] * 10),
            ("ms", "mean", [0.0] * 9 + [2.0 / 9]),
            ("ms", "max", [0.0] * 9 + [2.0]),
            ("s", "min", [0.0] * 10),
            ("s", "mean", [0.0] * 9 + [2.0 / 9]),
            ("s", "max", [0.0] * 9 + [2.0]),
            ("min", "min", [0.0] * 10),
            ("min", "mean", [0.0] * 9 + [2.0 / 9]),
            ("min", "max", [0.0] * 9 + [2.0]),
            ("h", "min", [0.0] * 10),
            ("h", "mean", [0.0] * 9 + [2.0 / 9]),
            ("h", "max", [0.0] * 9 + [2.0]),
            ("D", "min", [0.0] * 10),
            ("D", "mean", [0.0] * 9 + [2.0 / 9]),
            ("D", "max", [0.0] * 9 + [2.0]),
        ],
    )
    def test_freqs_ops(self, freq, op, result_data):
        # GH 21096
        index = date_range(start="2018-1-1 01:00:00",
                           freq=f"1{freq}",
                           periods=10)
        s = Series(data=0, index=index)
        s.iloc[1] = np.nan
        s.iloc[-1] = 2
        result = getattr(s.rolling(window=f"10{freq}"), op)()
        expected = Series(data=result_data, index=index)

        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "f",
        [
            "sum",
            "mean",
            pytest.param(
                "count",
                marks=pytest.mark.filterwarnings(
                    "ignore:min_periods:FutureWarning"),
            ),
            "median",
            "std",
            "var",
            "kurt",
            "skew",
            "min",
            "max",
        ],
    )
    def test_all(self, f, regular):

        # simple comparison of integer vs time-based windowing
        df = regular * 2
        er = df.rolling(window=1)
        r = df.rolling(window="1s")

        result = getattr(r, f)()
        expected = getattr(er, f)()
        tm.assert_frame_equal(result, expected)

        result = r.quantile(0.5)
        expected = er.quantile(0.5)
        tm.assert_frame_equal(result, expected)

    def test_all2(self, arithmetic_win_operators):
        f = arithmetic_win_operators
        # more sophisticated comparison of integer vs.
        # time-based windowing
        df = DataFrame({"B": np.arange(50)},
                       index=date_range("20130101", periods=50, freq="H"))
        # in-range data
        dft = df.between_time("09:00", "16:00")

        r = dft.rolling(window="5H")

        result = getattr(r, f)()

        # we need to roll the days separately
        # to compare with a time-based roll
        # finally groupby-apply will return a multi-index
        # so we need to drop the day
        def agg_by_day(x):
            x = x.between_time("09:00", "16:00")
            return getattr(x.rolling(5, min_periods=1), f)()

        expected = (df.groupby(df.index.day).apply(agg_by_day).reset_index(
            level=0, drop=True))

        tm.assert_frame_equal(result, expected)

    def test_groupby_monotonic(self):

        # GH 15130
        # we don't need to validate monotonicity when grouping

        # GH 43909 we should raise an error here to match
        # behaviour of non-groupby rolling.

        data = [
            ["David", "1/1/2015", 100],
            ["David", "1/5/2015", 500],
            ["David", "5/30/2015", 50],
            ["David", "7/25/2015", 50],
            ["Ryan", "1/4/2014", 100],
            ["Ryan", "1/19/2015", 500],
            ["Ryan", "3/31/2016", 50],
            ["Joe", "7/1/2015", 100],
            ["Joe", "9/9/2015", 500],
            ["Joe", "10/15/2015", 50],
        ]

        df = DataFrame(data=data, columns=["name", "date", "amount"])
        df["date"] = to_datetime(df["date"])
        df = df.sort_values("date")

        expected = (df.set_index("date").groupby("name").apply(
            lambda x: x.rolling("180D")["amount"].sum()))
        result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
        tm.assert_series_equal(result, expected)

    def test_non_monotonic_raises(self):
        # GH 13966 (similar to #15130, closed by #15175)

        # superseded by 43909

        dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
        df = DataFrame({
            "A": [1] * 20 + [2] * 12 + [3] * 8,
            "B": np.concatenate((dates, dates)),
            "C": np.arange(40),
        })

        expected = (df.set_index("B").groupby("A").apply(
            lambda x: x.rolling("4s")["C"].mean()))
        with pytest.raises(ValueError, match=r".* must be monotonic"):
            df.groupby("A").rolling(
                "4s",
                on="B").C.mean()  # should raise for non-monotonic t series

        df2 = df.sort_values("B")
        result = df2.groupby("A").rolling("4s", on="B").C.mean()
        tm.assert_series_equal(result, expected)

    def test_rolling_cov_offset(self):
        # GH16058

        idx = date_range("2017-01-01", periods=24, freq="1h")
        ss = Series(np.arange(len(idx)), index=idx)

        result = ss.rolling("2h").cov()
        expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
        tm.assert_series_equal(result, expected)

        expected2 = ss.rolling(2, min_periods=1).cov()
        tm.assert_series_equal(result, expected2)

        result = ss.rolling("3h").cov()
        expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
        tm.assert_series_equal(result, expected)

        expected2 = ss.rolling(3, min_periods=1).cov()
        tm.assert_series_equal(result, expected2)

    def test_rolling_on_decreasing_index(self):
        # GH-19248, GH-32385
        index = [
            Timestamp("20190101 09:00:30"),
            Timestamp("20190101 09:00:27"),
            Timestamp("20190101 09:00:20"),
            Timestamp("20190101 09:00:18"),
            Timestamp("20190101 09:00:10"),
        ]

        df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
        result = df.rolling("5s").min()
        expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]},
                             index=index)
        tm.assert_frame_equal(result, expected)

    def test_rolling_on_empty(self):
        # GH-32385
        df = DataFrame({"column": []}, index=[])
        result = df.rolling("5s").min()
        expected = DataFrame({"column": []}, index=[])
        tm.assert_frame_equal(result, expected)

    def test_rolling_on_multi_index_level(self):
        # GH-15584
        df = DataFrame(
            {"column": range(6)},
            index=MultiIndex.from_product(
                [date_range("20190101", periods=3),
                 range(2)],
                names=["date", "seq"]),
        )
        result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
        expected = DataFrame({"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]},
                             index=df.index)
        tm.assert_frame_equal(result, expected)
Exemple #10
0
def get_first_fortnight_last_day(ds):
    """Return the last day of the datestamp's fortnight for its month."""
    first_bday = ds + offsets.MonthBegin(1) - offsets.BMonthBegin(1)
    first_monday_second_fortnight = first_bday + offsets.BDay(10)
    last_sunday_first_fortnight = first_monday_second_fortnight - offsets.Day(1)
    return last_sunday_first_fortnight
import pandas as pd
import pandas.tseries.offsets as offsets

week_ends = pd.date_range(start='01/02/2017', end='05/01/2017', freq='W')
print(week_ends)
print(week_ends[16] + offsets.Week(1))

exit(0)


month_ends = pd.date_range(start='01/01/2016', end='05/01/2017', freq='M')
print(month_ends[15].replace(day=22))
print(month_ends[15])
if month_ends[15].month == 4 and month_ends[15].year == 2017:
    month_ends[15].replace(day=22)
    print(month_ends[15])
print(month_ends)
exit(0)
for month_end in month_ends:
    quarter_start = month_end - offsets.MonthBegin(3)
    next_month_start = month_end + offsets.MonthBegin(1)
    next_month_end = month_end + offsets.MonthEnd(1)
    year_start = month_end - offsets.MonthBegin(12)

    print("-"*30)
    print(quarter_start)
    print(next_month_start)
    print(next_month_end)
    print(year_start)
Exemple #12
0
def Orders_prediction():
    # from tqdm import tqdm
    csv.field_size_limit(1000000000)

    font = 'utf-8'
    # font='shift_jisx0213'

    # スクリプトのあるディレクトリの絶対パスを取得
    script_pass = os.path.dirname(os.path.abspath(__name__))
    if __name__ == '__main__':
        local_pass = script_pass + '/'
    else:
        local_pass = script_pass + '/Orders_prediction/'

    # ファイル選択ダイアログの表示
    root = tk.Tk()
    root.withdraw()
    fTyp = [("", "*")]
    iDir = local_pass

    # 受注実績データの取得
    # ここの1行を変更 askopenfilename → askopenfilenames
    file = tkinter.filedialog.askopenfilenames(filetypes=fTyp,
                                               initialdir=iDir,
                                               title='受注実績データの取得')

    if len(file) != 0:
        # 選択ファイルリスト作成
        list_f = list(file)

        # FCデータ取り込み
        # ファイル名を取得
        fc_pass = glob.glob(local_pass + 'FC_*.csv')
        fc_name = os.path.basename(fc_pass[0])
        fc_cd = fc_name.replace('FC_', '')
        fc_cd = fc_cd.replace('.csv', '')

        # FCファイルが複数ある場合にメッセージを出力
        if len(fc_pass) == 1:
            FC = pd.read_csv(local_pass + fc_name,
                             encoding=font,
                             dtype='object',
                             index_col=None)

            # FCからサプライヤコードを抽出
            fc_sup = FC.drop_duplicates(subset=['SUPPLIER_CD'],
                                        keep='first',
                                        inplace=False)  # 重複削除
            fc_sup = fc_sup.loc[::, ['SUPPLIER_CD']]
            fc_sup = fc_sup.T
            fc_sup_l = fc_sup.values.tolist()
            list_pg = fc_sup_l[0]

            # リストボックスの作成を実行
            result = getFACI_CD(fc_name, list_pg)
            check_list = [
                result[5], result[6], result[7], result[8], result[9],
                result[10], result[11], result[12]
            ]
            for c in range(len(check_list)):
                if len(check_list[c]) == 1:
                    check_list[c] = '0' + check_list[c]

            Tgt_S_M = result[1] + check_list[0] + check_list[4]
            Tgt_E_M = result[2] + check_list[1] + check_list[5]
            Pre_S_M = result[3] + check_list[2] + check_list[6]
            Pre_E_M = result[4] + check_list[3] + check_list[7]

            # 1つ目のファイルを開く
            f_name = os.path.basename(list_f[0])
            f_pass = os.path.dirname(list_f[0])

            # 必要な列のみ読み込む
            print(f_pass + '/' + f_name)
            order = pd.read_csv(
                f_pass + '/' + f_name,
                sep='\t',
                encoding=font,
                dtype=object,
                engine='python',
                error_bad_lines=False,
                usecols=[
                    '番号', '現法コード', 'グローバル番号', '受注日・見積回答日', '受注時間・見積回答時間',
                    'JST変換受注日・JST変換見積回答日', 'JST変換受注時間・JST変換見積回答時間', '見積有効日',
                    '見積有効時間', 'JST変換見積有効日', 'JST変換見積有効時間', 'アンフィット種別',
                    '得意先コード', '直送先コード', 'MCコード', 'インナーコード', '商品コード', '実績現法コード',
                    '実績仕入先コード', '実績管理単位コード', 'ACE仕入先コード', 'ACE仕入先カテゴリコード',
                    '受注実績SSD', '見積回答SSD', '数量', '納入区分', '顧客希望納期'
                ])

            # ファイルを繰り返し開き結合する
            for r in range(1, len(list_f)):
                f_name = os.path.basename(list_f[r])
                print(f_name)
                order_add = pd.read_csv(
                    f_pass + '/' + f_name,
                    sep='\t',
                    encoding=font,
                    dtype=object,
                    engine='python',
                    error_bad_lines=False,
                    usecols=[
                        '番号', '現法コード', 'グローバル番号', '受注日・見積回答日', '受注時間・見積回答時間',
                        'JST変換受注日・JST変換見積回答日', 'JST変換受注時間・JST変換見積回答時間',
                        '見積有効日', '見積有効時間', 'JST変換見積有効日', 'JST変換見積有効時間',
                        'アンフィット種別', '得意先コード', '直送先コード', 'MCコード', 'インナーコード',
                        '商品コード', '実績現法コード', '実績仕入先コード', '実績管理単位コード',
                        'ACE仕入先コード', 'ACE仕入先カテゴリコード', '受注実績SSD', '見積回答SSD',
                        '数量', '納入区分', '顧客希望納期'
                    ])
                # ファイルを追加する
                order = order.append(order_add, sort=False)

            # 非稼働日データを読み込む
            nowork_day = pd.read_csv(local_pass + 'nowork_day.csv',
                                     encoding=font,
                                     dtype='object',
                                     index_col=None)

            # 各現法、拠点の非稼働日をリスト化
            sub_name = [
                'CHN', 'GRM', 'HKG', 'IND', 'JKT', 'KOR', 'MEX', 'MJP', 'MYS',
                'SGP', 'THA', 'TIW', 'USA', 'VNM', '0143', '7017', '3764',
                '0FCN', '0AIO', 'SPCM'
            ]
            calendar_name = [
                'CAAAA', 'GAAAA', 'NAAAA', 'DAAAA', 'JAAAA', 'KAAAA', 'QAAAA',
                '5AAAA', 'MAAAA', 'SAAAA', 'HAAAA', 'TAAAA', 'UAAAA', 'VAAAA',
                '5AAAA', '5AAAA', '5AAAA', 'C8677', 'C8677', '50SPC'
            ]
            calendar_dict = {}
            for i in range(0, len(sub_name)):
                noworkday_df = nowork_day[nowork_day['CALENDAR_CD'] ==
                                          calendar_name[i]]
                noworkday_df = noworkday_df.loc[::, ['OFF_DATE']]
                noworkday_df = noworkday_df.T
                noworkday_list = noworkday_df.values.tolist()
                calendar_dict[sub_name[i]] = noworkday_list[0]

            # 見積りデータを除く
            order = order[order['見積有効日'].isnull()]

            # 受注日・見積回答日の名前を変える
            order = order.rename(columns={'受注日・見積回答日': '受注日'})
            order = order.astype({'受注日': int, '受注実績SSD': int})

            # 出荷実績SSDデータ利用期間に限定する
            # 基本的に受注予測は受注日でのサマリ
            # 条件を作成
            condition = Tgt_S_M + ' <= 受注日 <= ' + Tgt_E_M
            order = order.query(condition)

            # 作成したデータを入れるdfを作成
            prediction_all = pd.DataFrame(columns=[
                'SUBSIDIARY_CD', 'SUPPLIER_CD', 'FACILITY_CD', 'BASE_DATE',
                'BASE_DATE_ADD_DAYS', 'PREDICTION_QUANTITY', 'UPD_COUNT',
                'DEL_FLG', 'REG_USR', 'REG_TIME', 'UPD_USR', 'UPD_TIME'
            ])

            for pg_name in list_pg:
                # orderのサプライヤコードが指定のサプライヤのものだけにする
                order_sup = order[order['実績仕入先コード'] == pg_name].copy()
                if len(order_sup) > 0:
                    Tgt_S = copy.copy(Tgt_S_M)
                    Tgt_E = copy.copy(Tgt_E_M)
                    Pre_S = copy.copy(Pre_S_M)
                    Pre_E = copy.copy(Pre_E_M)
                    # 受注日、出荷日をdate形式へ変更
                    order_sup = order_sup.astype({'受注日': str, '受注実績SSD': str})
                    order_sup[
                        '受注日'] = order_sup['受注日'].str[0:4] + '-' + order_sup[
                            '受注日'].str[4:6] + '-' + order_sup['受注日'].str[6:8]
                    order_sup['受注実績SSD'] = order_sup['受注実績SSD'].str[
                        0:4] + '-' + order_sup['受注実績SSD'].str[
                            4:6] + '-' + order_sup['受注実績SSD'].str[6:8]

                    # 受注曜日カラムを追加
                    order_sup['weekday'] = [
                        dt.datetime.strptime(x, "%Y-%m-%d").strftime('%a')
                        for x in order_sup['受注日']
                    ]

                    # 受注日と受注実績SSDをdate形式に変換
                    order_sup['受注日'] = pd.to_datetime(order_sup['受注日'])
                    order_sup['受注実績SSD'] = pd.to_datetime(order_sup['受注実績SSD'])

                    # 受注日をDatetimeIndexとし年、月、曜日のindexを追加
                    order_sup = order_sup.set_index('受注日')
                    order_sup = order_sup.set_index([
                        order_sup.index.year, order_sup.index.month,
                        order_sup.index
                    ])
                    order_sup.index.names = ['year', 'month', '受注日']
                    order_sup = order_sup.reset_index()

                    # 納期属性カラムを追加
                    # 非稼働日カレンダを指定
                    nowork_day_w = calendar_dict[pg_name][:]

                    # カレンダのリストのstrを1行ずつdate形式に変換
                    for i in range(0, len(nowork_day_w)):
                        nowork_day_w[i] = dt.datetime.strptime(
                            nowork_day_w[i], "%Y-%m-%d")

                    # 納期属性を計算
                    order_sup['納期属性'] = [
                        (z - y).days -
                        len(list(filter(lambda x: y <= x <= z, nowork_day_w)))
                        for y, z in zip(order_sup['受注日'], order_sup['受注実績SSD'])
                    ]

                    # 納期属性を73以上は全て73,0未満のRECは0へ(削除も検討?正しくない)
                    order_sup.loc[order_sup['納期属性'] > 73, '納期属性'] = 73
                    order_sup.loc[order_sup['納期属性'] < 0, '納期属性'] = 0
                    # order_sup = order_sup.query('納期属性 >= 0')
                    order_sup = order_sup.astype({'数量': int})

                    # 現法毎の実績,稼働日数を集計
                    # 現法毎の年間数量を集計

                    order_subtotal = order_sup.groupby(
                        ['year', 'month'], as_index=False)['数量'].sum()
                    order_subtotal = order_subtotal.astype({
                        'year': str,
                        'month': str
                    })
                    # order_subtotalを割る製造拠点の稼働日を集計
                    order_subtotal['開始日'] = pd.to_datetime(
                        (order_subtotal['year'] + '/' +
                         order_subtotal['month'] + '/01'),
                        format='%Y/%m/%d')
                    order_subtotal[
                        '終了日'] = order_subtotal['開始日'] + offsets.MonthBegin(1)
                    order_subtotal['月稼働日'] = [
                        (z - y).days -
                        len(list(filter(lambda x: y <= x < z, nowork_day_w)))
                        for y, z in zip(order_subtotal['開始日'],
                                        order_subtotal['終了日'])
                    ]
                    order_subtotal['月平均本数'] = (order_subtotal['数量'] /
                                               order_subtotal['月稼働日']).round(3)

                    # 現法毎の期間中の受注数量比を算出
                    order_subratio = order_sup.groupby(
                        ['year', '現法コード'], as_index=False)['数量'].sum()
                    order_subratio['合計'] = order_subtotal['数量'].sum()
                    order_subratio['ratio'] = (order_subratio['数量'] /
                                               order_subratio['合計']).round(3)
                    order_subratio.drop(['数量', '合計'], axis=1, inplace=True)

                    # 月ごとの数量合計に現法比率をかける yearのtypeを統一する
                    order_subtotal = order_subtotal.astype({
                        'year': int,
                        'month': int
                    })
                    order_subtotal = pd.merge(order_subtotal,
                                              order_subratio,
                                              on=['year'],
                                              how='left')
                    order_subtotal['月平均本数'] = (
                        order_subtotal['月平均本数'] *
                        order_subtotal['ratio']).round(3)

                    # とりあえず実績をFCの数値をして利用する
                    # FCを実績で作成する場合は期間を出荷日で規定
                    # しかし受注予測は受注日で規定 Tgt_S,Eを使ったフィルタリングでは不足
                    # とはいえ基本的にFCは外から入れることにする
                    # 不要列削除
                    # order_subtotal.drop(['数量', '開始日', '終了日', '月稼働日', 'ratio'], axis=1, inplace=True)
                    # FC = order_subtotal

                    # header名の変更
                    FC = FC.rename(columns={'SUBSIDIARY_CD': '現法コード'})
                    # SUPPLIER_CDの指定
                    FC = FC[FC['SUPPLIER_CD'] == pg_name]
                    FC = FC.astype({'year': str, 'month': str, 'FC': float})

                    # FCを割る製造拠点の稼働日を集計しFCを日当り数量へ
                    FC['開始日'] = pd.to_datetime(
                        (FC['year'] + '/' + FC['month'] + '/01'),
                        format='%Y/%m/%d')
                    FC['終了日'] = FC['開始日'] + offsets.MonthBegin(1)
                    FC['月稼働日'] = [
                        (z - y).days -
                        len(list(filter(lambda x: y <= x < z, nowork_day_w)))
                        for y, z in zip(FC['開始日'], FC['終了日'])
                    ]
                    FC['月平均本数'] = (FC['FC'] / FC['月稼働日']).round(3)
                    FC = FC.astype({'year': int, 'month': int})

                    # 曜日ごとの稼働日をカウント
                    Tgt_S = dt.datetime.strptime(Tgt_S, '%Y%m%d')
                    Tgt_E = dt.datetime.strptime(Tgt_E, '%Y%m%d')
                    week_count = pd.DataFrame({'count': [], 'weekday': []})
                    count = 0
                    while Tgt_S <= Tgt_E:
                        if not Tgt_S.strftime(
                                '%Y-%m-%d') in calendar_dict[pg_name]:
                            weekday = Tgt_S.strftime('%a')
                            week_count.loc[count] = [Tgt_S, weekday]
                            count += 1
                        Tgt_S = Tgt_S + dt.timedelta(days=1)
                    week_count = week_count.groupby(['weekday'],
                                                    as_index=False).count()

                    # 曜日指数計算
                    # 日付毎に数量を集計
                    # 大口を除く
                    order_small = order_sup[order_sup['アンフィット種別'] == '0']
                    order_day = order_small.groupby(
                        ['現法コード', 'weekday', '受注日'],
                        as_index=False)['数量'].sum()
                    order_week = order_day.groupby(['現法コード', 'weekday'],
                                                   as_index=False)['数量'].sum()
                    order_week = pd.merge(order_week,
                                          week_count,
                                          on=['weekday'],
                                          how='left')
                    order_week.loc[
                        order_week['count'] != 0,
                        '数量'] = order_week['数量'] / order_week['count']
                    order_week.loc[order_week['count'] == 0, '数量'] = 0
                    order_week1 = order_week[order_week['weekday'] != 'Sun']
                    order_week1 = order_week1.groupby(['現法コード',
                                                       'weekday'])['数量'].sum()
                    order_week1 = order_week1.groupby(['現法コード'
                                                       ]).transform(qtyave)
                    order_week2 = order_week.groupby(['現法コード',
                                                      'weekday'])['数量'].sum()
                    order_week2 = order_week2.groupby(['現法コード'
                                                       ]).transform(qtyave)
                    order_week1 = order_week1.reset_index()
                    order_week2 = order_week2.reset_index()
                    order_week2 = order_week2[order_week2['weekday'] == 'Sun']
                    order_week1 = order_week1.append(order_week2, sort=False)
                    order_week1 = order_week1.rename(
                        columns={'数量': 'week_ratio'})

                    # 現法、曜日、納期属性の箱を用意
                    base_sh = pd.read_csv(local_pass + 'base_sh.csv',
                                          encoding=font,
                                          index_col=None,
                                          dtype={
                                              '数量': int,
                                              '納期属性': int
                                          })

                    # 現法、曜日、納期属性毎の数量を合計
                    order_A = order_sup[order_sup['アンフィット種別'] == '0']  # 大口を除く
                    order_A = order_A.groupby(['現法コード', 'weekday',
                                               '納期属性'])['数量'].sum()
                    order_A = order_A.groupby(['現法コード',
                                               'weekday']).transform(qtyratio)
                    order_A = order_A.reset_index()

                    # base_shと結合し0を補足
                    base_sh = pd.merge(base_sh,
                                       order_A,
                                       on=['現法コード', 'weekday', '納期属性'],
                                       how='outer')
                    base_sh.loc[base_sh['数量_y'].notnull(),
                                '数量_x'] = base_sh['数量_y']
                    base_sh = base_sh.rename(columns={'数量_x': '数量'})
                    base_sh.drop(['数量_y'], axis=1, inplace=True)
                    base_sh = base_sh.round({'数量': 4})
                    base_sh = base_sh.rename(columns={'数量': 'n_ratio'})

                    # 小口比率を計算
                    small_ratio = order_sup.groupby(
                        ['現法コード'], as_index=False)['数量'].sum()
                    small_ratio_A = order_sup[order_sup['アンフィット種別'] ==
                                              '0']  # 大口を除く
                    small_ratio_A = small_ratio_A.groupby(
                        ['現法コード'], as_index=False)['数量'].sum()
                    small_ratio = pd.merge(small_ratio_A,
                                           small_ratio,
                                           on=['現法コード'],
                                           how='right')
                    small_ratio.loc[small_ratio['数量_x'].isnull(), '数量_x'] = 0
                    small_ratio['small_ratio'] = small_ratio[
                        '数量_x'] / small_ratio['数量_y']
                    small_ratio.drop(['数量_x', '数量_y'], axis=1, inplace=True)

                    # 開始日終了日をdate形式に
                    Pre_S = dt.datetime.strptime(Pre_S, '%Y%m%d')
                    Pre_E = dt.datetime.strptime(Pre_E, '%Y%m%d')

                    # 受注日のリストを作成
                    day_list = [Pre_S]
                    day_n = Pre_S
                    while day_n <= Pre_E:
                        day_list.append(day_n)
                        day_n = day_n + datetime.timedelta(days=1)
                        # prediction = pd.date_range(start=Pre_S, end=Pre_E, freq='D', name='受注日')
                        # prediction = prediction.to_series()
                        # prediction = pd.DataFrame(prediction)
                    # 受注日*出荷日のリストを作成
                    so_day_list = []
                    sd_day_list = []
                    # noukizokusei = list(range(73))
                    noukizokusei_list = []
                    for nouki in range(len(day_list)):
                        noworkday_count = 0
                        for n in range(73):
                            so_day_list.append(day_list[nouki])
                            noukizokusei_list.append(n)
                            sd_day = day_list[nouki] + datetime.timedelta(
                                days=(n + noworkday_count))
                            # 出荷日稼働flgを作成し非稼働日なら+1する
                            while sd_day.strftime(
                                    '%Y-%m-%d') in calendar_dict[pg_name]:
                                sd_day = sd_day + dt.timedelta(days=1)
                                noworkday_count = noworkday_count + 1
                            sd_day_list.append(sd_day)

                    # マルチプロセス処理の結果を入れるdfを作成
                    prediction_sum = pd.DataFrame({
                        '現法コード': [],
                        '受注日': [],
                        '受注日稼働flg': [],
                        '出荷日': [],
                        '出荷日稼働flg': [],
                        '納期属性': []
                    })
                    # 現法毎にマルチプロセスで処理
                    pool = Pool(multi.cpu_count() - 2)
                    list1 = [(x, so_day_list, sd_day_list, noukizokusei_list,
                              calendar_dict, sub_name) for x in range(14)]
                    pre_list = pool.map(wrapper, list1)
                    pool.close()

                    # 返り値がlist形式で格納しされるのでfor文で結合
                    for x in range(14):
                        prediction_sum = prediction_sum.append(pre_list[x],
                                                               sort=False)

                    # 受注曜日カラムを追加
                    prediction = prediction_sum
                    prediction['weekday'] = [
                        x.strftime('%a') for x in prediction['受注日']
                    ]
                    '''
                    # 受注日をDatetimeIndexとし年、月、曜日のindexを追加
                    prediction = prediction.set_index('受注日')
                    prediction = prediction.set_index([prediction.index.year, prediction.index.month, prediction.index])
                    prediction.index.names = ['year_so', 'month_so', '受注日']
                    prediction = prediction.reset_index()
                    '''
                    # 出荷日をDatetimeIndexとし年、月、曜日のindexを追加
                    prediction = prediction.set_index('出荷日')
                    prediction = prediction.set_index([
                        prediction.index.year, prediction.index.month,
                        prediction.index
                    ])
                    prediction.index.names = ['year', 'month', '出荷日']
                    prediction = prediction.reset_index()

                    # FCを結合
                    prediction = pd.merge(prediction,
                                          FC,
                                          on=['現法コード', 'year', 'month'],
                                          how='left')
                    # 曜日比率を結合
                    prediction = pd.merge(prediction,
                                          order_week1,
                                          on=['現法コード', 'weekday'],
                                          how='left')
                    # 納期属性を結合
                    prediction = pd.merge(prediction,
                                          base_sh,
                                          on=['現法コード', 'weekday', '納期属性'],
                                          how='left')
                    # 小口比率を追加
                    prediction = pd.merge(prediction,
                                          small_ratio,
                                          on=['現法コード'],
                                          how='left')
                    # ブランクを0で埋める
                    prediction.loc[prediction['月平均本数'].isnull(), '月平均本数'] = 0
                    prediction.loc[prediction['week_ratio'].isnull(),
                                   'week_ratio'] = 0

                    # 現法毎日当たり数量を算出
                    prediction['数量'] = prediction['受注日稼働flg'] * prediction[
                        '出荷日稼働flg'] * prediction['月平均本数'] * prediction[
                            'week_ratio'] * prediction['n_ratio'] * prediction[
                                'small_ratio']
                    prediction = prediction.round({'数量': 3})

                    # 受注日と出荷日毎のデータを出力
                    f_name = pg_name + '_prediction_row.tsv'
                    prediction.to_csv(local_pass + f_name,
                                      sep='\t',
                                      encoding=font,
                                      quotechar='"',
                                      line_terminator='\n',
                                      index=False)

                    # 受注日*出荷日毎の数量を合計 日付の型をあとで修正
                    prediction = prediction.groupby(
                        ['受注日', '出荷日'], as_index=False)['数量'].sum()
                    prediction['受注日'] = pd.to_datetime(prediction['受注日'])

                    q, mod = divmod(((Pre_E - Pre_S).days + 1), 20)
                    FACILITY_DICT = {}
                    FACILITY_DICT['0143'] = ['MJP', '0143', 'AIO']
                    FACILITY_DICT['7017'] = ['MJP', '7017', 'MAL']
                    FACILITY_DICT['3764'] = ['MJP', '3764', 'AAL']
                    FACILITY_DICT['0FCN'] = ['CHN', '0FCN', 'FAL']
                    FACILITY_DICT['0AIO'] = ['CHN', '0AIO', 'F2A']
                    FACILITY_DICT['SPCM'] = ['VNM', 'SPCM', 'SAL']

                    FACILITY_L = FACILITY_DICT[pg_name]

                    # ■受注予測(d) 対象日から見た予測数量積み上げ分
                    # マルチプロセス対応 マルチプロセス回数は20回とする
                    pre_c = pd.DataFrame({
                        'BASE_DATE': [],
                        'BASE_DATE_ADD_DAYS': [],
                        'PREDICTION_QUANTITY': []
                    })
                    pre_c = pre_c.astype({
                        'BASE_DATE_ADD_DAYS': int,
                        'PREDICTION_QUANTITY': float
                    })
                    # q*20パート
                    for s in range(0, q):
                        pool = Pool(multi.cpu_count() - 2)
                        list3 = [(d, Pre_S, prediction)
                                 for d in range((20 * s), (20 * s + 20))]
                        prediction_sum3 = pool.map(wrapper3, list3)
                        pool.close()
                        for d in range(0, 20):
                            pre_c = pre_c.append(prediction_sum3[d],
                                                 sort=False)
                    # modパート
                    pool = Pool(multi.cpu_count() - 2)
                    list3 = [(d, Pre_S, prediction)
                             for d in range((20 * q), (20 * q + mod))]
                    prediction_sum4 = pool.map(wrapper3, list3)
                    pool.close()
                    for d in range(0, mod):
                        pre_c = pre_c.append(prediction_sum4[d], sort=False)

                    pre_c.reset_index(drop=True, inplace=True)

                    pre_c.loc[:, 'SUBSIDIARY_CD'] = FACILITY_L[0]
                    pre_c.loc[:, 'SUPPLIER_CD'] = FACILITY_L[1]
                    pre_c.loc[:, 'FACILITY_CD'] = FACILITY_L[2]

                    prediction_all = prediction_all.append(pre_c, sort=False)
                else:
                    print(pg_name + 'に該当する実績データがありません!')

            # ファイルアウトプット
            Today = "'" + dt.datetime.today().strftime("%Y-%m-%d") + "'"
            prediction_all = prediction_all.round({'PREDICTION_QUANTITY': 3})
            prediction_all.loc[:, 'UPD_COUNT'] = '0'
            prediction_all.loc[:, 'DEL_FLG'] = '0'
            prediction_all.loc[:, 'REG_USR'] = fc_cd
            prediction_all.loc[:, 'REG_TIME'] = Today
            prediction_all.loc[:, 'UPD_USR'] = fc_cd
            prediction_all.loc[:, 'UPD_TIME'] = Today

            f_name = 'est_' + fc_cd + '.tsv'
            prediction_all.to_csv(local_pass + f_name,
                                  sep='\t',
                                  encoding=font,
                                  quotechar='"',
                                  line_terminator='\n',
                                  index=False)

            print('FC_' + fc_cd + 'を元に' + f_name + 'を作成しました!')

            print('受注予測作成完了しました!')
        else:
            print('FC_*.csvファイルが複数ある、または存在しません!')
            print('受注予測作成終了します')
def Orders_prediction_for_Supplier():
    # from tqdm import tqdm
    csv.field_size_limit(1000000000)

    font = 'utf-8'
    # font='shift_jisx0213'

    # スクリプトのあるディレクトリの絶対パスを取得
    script_pass = os.path.dirname(os.path.abspath(__name__))
    if __name__ == '__main__':
        local_pass = script_pass + '/'
    else:
        local_pass = script_pass + '/Orders_prediction/'

    # ファイル選択ダイアログの表示
    root = tk.Tk()
    root.withdraw()
    fTyp = [("", "*")]
    iDir = local_pass

    # 受注実績データの取得
    # ここの1行を変更 askopenfilename → askopenfilenames
    file = tkinter.filedialog.askopenfilenames(filetypes=fTyp, initialdir=iDir, title='受注実績データの取得')

    if len(file) != 0:
        # 選択ファイルリスト作成
        list_f = list(file)

        # リストボックスの作成を実行
        result = getFACI_CD()

        pg_name = result[0]
        check_list = [result[5], result[6], result[7], result[8], result[9], result[10], result[11], result[12]]
        for c in range(len(check_list)):
            if len(check_list[c]) == 1:
                check_list[c] = '0' + check_list[c]

        Tgt_S = result[1] + check_list[0] + check_list[4]
        Tgt_E = result[2] + check_list[1] + check_list[5]
        Pre_S = result[3] + check_list[2] + check_list[6]
        Pre_E = result[4] + check_list[3] + check_list[7]

        # 1つ目のファイルを開く
        f_name = os.path.basename(list_f[0])
        f_pass = os.path.dirname(list_f[0])

        # 必要な列のみ読み込む
        print(f_pass + '/' + f_name)
        order = pd.read_csv(f_pass + '/' + f_name, sep='\t', encoding=font, dtype=object, engine='python',
                            error_bad_lines=False,
                            usecols=['番号', '現法コード', 'グローバル番号', '受注日・見積回答日', '受注時間・見積回答時間', 'JST変換受注日・JST変換見積回答日',
                                     'JST変換受注時間・JST変換見積回答時間', '見積有効日', '見積有効時間', 'JST変換見積有効日', 'JST変換見積有効時間',
                                     'アンフィット種別', '得意先コード', '直送先コード', 'MCコード', 'インナーコード', '商品コード', '実績現法コード', '実績仕入先コード',
                                     '実績管理単位コード', 'ACE仕入先コード', 'ACE仕入先カテゴリコード', '受注実績SSD', '見積回答SSD', '数量', '納入区分',
                                     '顧客希望納期', '置場コード1', '置場コード2'])

        # ファイルを繰り返し開き結合する
        for r in range(1, len(list_f)):
            f_name = os.path.basename(list_f[r])
            print(f_name)
            order_add = pd.read_csv(f_pass + '/' + f_name, sep='\t', encoding=font, dtype=object, engine='python',
                                    error_bad_lines=False,
                                    usecols=['番号', '現法コード', 'グローバル番号', '受注日・見積回答日', '受注時間・見積回答時間',
                                             'JST変換受注日・JST変換見積回答日', 'JST変換受注時間・JST変換見積回答時間', '見積有効日', '見積有効時間',
                                             'JST変換見積有効日', 'JST変換見積有効時間', 'アンフィット種別', '得意先コード', '直送先コード', 'MCコード',
                                             'インナーコード', '商品コード', '実績現法コード', '実績仕入先コード', '実績管理単位コード', 'ACE仕入先コード',
                                             'ACE仕入先カテゴリコード', '受注実績SSD', '見積回答SSD', '数量', '納入区分', '顧客希望納期', '置場コード1', '置場コード2'])
            # ファイルを追加する
            order = order.append(order_add, sort=False)

        # 非稼働日データを読み込む
        nowork_day = pd.read_csv(local_pass + 'nowork_day.csv', encoding=font, dtype='object', index_col=None)

        # 各現法、拠点の非稼働日をリスト化
        sub_name = ['CHN', 'GRM', 'HKG', 'IND', 'JKT', 'KOR', 'MEX', 'MJP', 'MYS', 'SGP', 'THA', 'TIW', 'USA', 'VNM',
                    '0143', '7017', '3764', '0FCN', '0AIO', 'SPCM']
        calendar_name = ['CAAAA', 'GAAAA', 'NAAAA', 'DAAAA', 'JAAAA', 'KAAAA', 'QAAAA', '5AAAA', 'MAAAA', 'SAAAA',
                         'HAAAA', 'TAAAA', 'UAAAA', 'VAAAA', '5AAAA', '5AAAA', '5AAAA', 'C8677', 'C8677', '50SPC']
        calendar_dict = {}
        for i in range(0, len(sub_name)):
            noworkday_df = nowork_day[nowork_day['CALENDAR_CD'] == calendar_name[i]]
            noworkday_df = noworkday_df.loc[::, ['OFF_DATE']]
            noworkday_df = noworkday_df.T
            noworkday_list = noworkday_df.values.tolist()
            calendar_dict[sub_name[i]] = noworkday_list[0]

        # 見積りデータを除く
        order = order[order['見積有効日'].isnull()]

        # 受注日・見積回答日の名前を変える
        order = order.rename(columns={'受注日・見積回答日': '受注日'})
        order = order.astype({'受注日': int, '受注実績SSD': int})

        # 出荷実績SSDデータ利用期間に限定する
        # 基本的に受注予測は受注日でのサマリ
        # 条件を作成
        condition = Tgt_S + ' <= 受注日 <= ' + Tgt_E
        order = order.query(condition)
        # orderのサプライヤコードが指定のサプライヤのものだけにする
        order = order[order['実績仕入先コード'] == pg_name]

        if len(order) > 0:
            # 受注日、出荷日をdate形式へ変更
            order = order.astype({'受注日': str, '受注実績SSD': str})
            order['受注日'] = order['受注日'].str[0:4] + '-' + order['受注日'].str[4:6] + '-' + order['受注日'].str[6:8]
            order['受注実績SSD'] = order['受注実績SSD'].str[0:4] + '-' + order['受注実績SSD'].str[4:6] + '-' + order['受注実績SSD'].str[
                                                                                                   6:8]
            # 南通で実施の場合は広州向けSSDを1日前倒しする
            order.loc[order[], '受注実績SSD']
            # 受注曜日カラムを追加
            order['weekday'] = [dt.datetime.strptime(x, "%Y-%m-%d").strftime('%a') for x in order['受注日']]

            # 受注日と受注実績SSDをdate形式に変換
            order['受注日'] = pd.to_datetime(order['受注日'])
            order['受注実績SSD'] = pd.to_datetime(order['受注実績SSD'])

            # 受注日をDatetimeIndexとし年、月、曜日のindexを追加
            order = order.set_index('受注日')
            order = order.set_index([order.index.year, order.index.month, order.index])
            order.index.names = ['year', 'month', '受注日']
            order = order.reset_index()

            # 納期属性カラムを追加
            # 非稼働日カレンダを指定
            nowork_day_w = calendar_dict[pg_name][:]

            # カレンダのリストのstrを1行ずつdate形式に変換
            for i in range(0, len(nowork_day_w)):
                nowork_day_w[i] = dt.datetime.strptime(nowork_day_w[i], "%Y-%m-%d")

            # 納期属性を計算
            order['納期属性'] = [(z - y).days - len(list(filter(lambda x: y <= x <= z, nowork_day_w))) for y, z in
                             zip(order['受注日'], order['受注実績SSD'])]

            # 納期属性を73以上は全て73,0未満のRECは0へ(削除も検討?正しくない)
            order.loc[order['納期属性'] > 73, '納期属性'] = 73
            order.loc[order['納期属性'] < 0, '納期属性'] = 0
            # order = order.query('納期属性 >= 0')
            order = order.astype({'数量': int})

            # 現法毎の実績,稼働日数を集計
            # 現法毎の年間数量を集計

            order_subtotal = order.groupby(['year', 'month'], as_index=False)['数量'].sum()
            order_subtotal = order_subtotal.astype({'year': str, 'month': str})
            # order_subtotalを割る製造拠点の稼働日を集計
            order_subtotal['開始日'] = pd.to_datetime((order_subtotal['year'] + '/' + order_subtotal['month'] + '/01'),
                                                   format='%Y/%m/%d')
            order_subtotal['終了日'] = order_subtotal['開始日'] + offsets.MonthBegin(1)
            order_subtotal['月稼働日'] = [(z - y).days - len(list(filter(lambda x: y <= x < z, nowork_day_w))) for y, z in
                                      zip(order_subtotal['開始日'], order_subtotal['終了日'])]
            order_subtotal['月平均本数'] = (order_subtotal['数量'] / order_subtotal['月稼働日']).round(3)

            # 現法毎の期間中の受注数量比を算出
            order_subratio = order.groupby(['year', '現法コード'], as_index=False)['数量'].sum()
            order_subratio['合計'] = order_subtotal['数量'].sum()
            order_subratio['ratio'] = (order_subratio['数量'] / order_subratio['合計']).round(3)
            order_subratio.drop(['数量', '合計'], axis=1, inplace=True)

            # 月ごとの数量合計に現法比率をかける yearのtypeを統一する
            order_subtotal = order_subtotal.astype({'year': int, 'month': int})
            order_subtotal = pd.merge(order_subtotal, order_subratio, on=['year'], how='left')
            order_subtotal['月平均本数'] = (order_subtotal['月平均本数'] * order_subtotal['ratio']).round(3)

            # とりあえず実績をFCの数値をして利用する
            # FCを実績で作成する場合は期間を出荷日で規定
            # しかし受注予測は受注日で規定 Tgt_S,Eを使ったフィルタリングでは不足
            # とはいえ基本的にFCは外から入れることにする
            # 不要列削除
            # order_subtotal.drop(['数量', '開始日', '終了日', '月稼働日', 'ratio'], axis=1, inplace=True)
            # FC = order_subtotal

            # FCデータ取り込み
            FC = pd.read_csv(local_pass + 'FC.csv', encoding=font, dtype='object', index_col=None)
            # header名の変更
            FC = FC.rename(columns={'SUBSIDIARY_CD': '現法コード'})
            # SUPPLIER_CDの指定
            FC = FC[FC['SUPPLIER_CD'] == pg_name]
            FC = FC.astype({'year': str, 'month': str, 'FC': float})

            # FCを割る製造拠点の稼働日を集計しFCを日当り数量へ
            FC['開始日'] = pd.to_datetime((FC['year'] + '/' + FC['month'] + '/01'), format='%Y/%m/%d')
            FC['終了日'] = FC['開始日'] + offsets.MonthBegin(1)
            FC['月稼働日'] = [(z - y).days - len(list(filter(lambda x: y <= x < z, nowork_day_w))) for y, z in
                          zip(FC['開始日'], FC['終了日'])]
            FC['月平均本数'] = (FC['FC'] / FC['月稼働日']).round(3)
            FC = FC.astype({'year': int, 'month': int})

            # 曜日ごとの稼働日をカウント
            Tgt_S = dt.datetime.strptime(Tgt_S, '%Y%m%d')
            Tgt_E = dt.datetime.strptime(Tgt_E, '%Y%m%d')
            week_count = pd.DataFrame({'count': [], 'weekday': []})
            count = 0
            while Tgt_S <= Tgt_E:
                if not Tgt_S.strftime('%Y-%m-%d') in calendar_dict[pg_name]:
                    weekday = Tgt_S.strftime('%a')
                    week_count.loc[count] = [Tgt_S, weekday]
                    count += 1
                Tgt_S = Tgt_S + dt.timedelta(days=1)
            week_count = week_count.groupby(['weekday'], as_index=False).count()

            # 曜日指数計算
            # 日付毎に数量を集計
            # 大口を除く
            order_small = order[order['アンフィット種別'] == '0']
            order_day = order_small.groupby(['現法コード', 'weekday', '受注日'], as_index=False)['数量'].sum()
            order_week = order_day.groupby(['現法コード', 'weekday'], as_index=False)['数量'].sum()
            order_week = pd.merge(order_week, week_count, on=['weekday'], how='left')
            order_week.loc[order_week['count'] != 0, '数量'] = order_week['数量'] / order_week['count']
            order_week.loc[order_week['count'] == 0, '数量'] = 0
            order_week1 = order_week[order_week['weekday'] != 'Sun']
            order_week1 = order_week1.groupby(['現法コード', 'weekday'])['数量'].sum()
            order_week1 = order_week1.groupby(['現法コード']).transform(qtyave)
            order_week2 = order_week.groupby(['現法コード', 'weekday'])['数量'].sum()
            order_week2 = order_week2.groupby(['現法コード']).transform(qtyave)
            order_week1 = order_week1.reset_index()
            order_week2 = order_week2.reset_index()
            order_week2 = order_week2[order_week2['weekday'] == 'Sun']
            order_week1 = order_week1.append(order_week2, sort=False)
            order_week1 = order_week1.rename(columns={'数量': 'week_ratio'})

            # 現法、曜日、納期属性の箱を用意
            base_sh = pd.read_csv(local_pass + 'base_sh.csv', encoding=font, index_col=None,
                                  dtype={'数量': int, '納期属性': int})

            # 現法、曜日、納期属性毎の数量を合計
            order_A = order[order['アンフィット種別'] == '0']  # 大口を除く
            order_A = order_A.groupby(['現法コード', 'weekday', '納期属性'])['数量'].sum()
            order_A = order_A.groupby(['現法コード', 'weekday']).transform(qtyratio)
            order_A = order_A.reset_index()

            # base_shと結合し0を補足
            base_sh = pd.merge(base_sh, order_A, on=['現法コード', 'weekday', '納期属性'], how='outer')
            base_sh.loc[base_sh['数量_y'].notnull(), '数量_x'] = base_sh['数量_y']
            base_sh = base_sh.rename(columns={'数量_x': '数量'})
            base_sh.drop(['数量_y'], axis=1, inplace=True)
            base_sh = base_sh.round({'数量': 4})
            base_sh = base_sh.rename(columns={'数量': 'n_ratio'})

            # 小口比率を計算
            small_ratio = order.groupby(['現法コード'], as_index=False)['数量'].sum()
            small_ratio_A = order[order['アンフィット種別'] == '0']  # 大口を除く
            small_ratio_A = small_ratio_A.groupby(['現法コード'], as_index=False)['数量'].sum()
            small_ratio = pd.merge(small_ratio_A, small_ratio, on=['現法コード'], how='right')
            small_ratio.loc[small_ratio['数量_x'].isnull(), '数量_x'] = 0
            small_ratio['small_ratio'] = small_ratio['数量_x'] / small_ratio['数量_y']
            small_ratio.drop(['数量_x', '数量_y'], axis=1, inplace=True)

            # 開始日終了日をdate形式に
            Pre_S = dt.datetime.strptime(Pre_S, '%Y%m%d')
            Pre_E = dt.datetime.strptime(Pre_E, '%Y%m%d')

            # 受注日のリストを作成
            day_list = [Pre_S]
            day_n = Pre_S
            while day_n <= Pre_E:
                day_list.append(day_n)
                day_n = day_n + datetime.timedelta(days=1)
            # 工場受注日を作成、工場非稼働日の際は稼働日までずらす
            supday_list = day_list.copy()
            for supp in range(len(supday_list)):
                # 非稼働日なら+1する
                while supday_list[supp].strftime('%Y-%m-%d') in calendar_dict[pg_name]:
                    supday_list[supp] = supday_list[supp] + dt.timedelta(days=1)

            # 受注日*出荷日のリストを作成
            so_day_list = []
            sd_day_list = []
            # 工場受注日のリストを作成
            sosup_day_list = []
            noukizokusei = list(range(73))
            noukizokusei_list = []
            for nouki in range(len(day_list)):
                noworkday_count = 0
                for n in range(73):
                    so_day_list.append(day_list[nouki])
                    sosup_day_list.append(supday_list[nouki])
                    noukizokusei_list.append(n)
                    sd_day = day_list[nouki] + datetime.timedelta(days=(n + noworkday_count))
                    # 出荷日稼働flgを作成し非稼働日なら+1する
                    while sd_day.strftime('%Y-%m-%d') in calendar_dict[pg_name]:
                        sd_day = sd_day + dt.timedelta(days=1)
                        noworkday_count = noworkday_count + 1
                    sd_day_list.append(sd_day)

            # マルチプロセス処理の結果を入れるdfを作成
            prediction_sum = pd.DataFrame(
                {'現法コード': [], '受注日': [], '受注日稼働flg': [], '工場受注日': [], '出荷日': [], '出荷日稼働flg': [], '納期属性': []})
            # 現法毎にマルチプロセスで処理
            pool = Pool(multi.cpu_count() - 2)
            list1 = [(x, so_day_list, sd_day_list, sosup_day_list, noukizokusei_list, calendar_dict, sub_name) for x in
                     range(14)]
            pre_list = pool.map(wrapper, list1)
            pool.close()

            # 返り値がlist形式で格納しされるのでfor文で結合
            for x in range(14):
                prediction_sum = prediction_sum.append(pre_list[x], sort=False)

            # 受注曜日カラムを追加
            prediction = prediction_sum
            prediction['weekday'] = [x.strftime('%a') for x in prediction['受注日']]

            '''
            # 受注日をDatetimeIndexとし年、月、曜日のindexを追加
            prediction = prediction.set_index('受注日')
            prediction = prediction.set_index([prediction.index.year, prediction.index.month, prediction.index])
            prediction.index.names = ['year_so', 'month_so', '受注日']
            prediction = prediction.reset_index()
            '''
            # 出荷日をDatetimeIndexとし年、月、曜日のindexを追加
            prediction = prediction.set_index('出荷日')
            prediction = prediction.set_index([prediction.index.year, prediction.index.month, prediction.index])
            prediction.index.names = ['year', 'month', '出荷日']
            prediction = prediction.reset_index()

            # FCを結合
            prediction = pd.merge(prediction, FC, on=['現法コード', 'year', 'month'], how='left')
            # 曜日比率を結合
            prediction = pd.merge(prediction, order_week1, on=['現法コード', 'weekday'], how='left')
            # 納期属性を結合
            prediction = pd.merge(prediction, base_sh, on=['現法コード', 'weekday', '納期属性'], how='left')
            # 小口比率を追加
            prediction = pd.merge(prediction, small_ratio, on=['現法コード'], how='left')
            # ブランクを0で埋める
            prediction.loc[prediction['月平均本数'].isnull(), '月平均本数'] = 0
            prediction.loc[prediction['week_ratio'].isnull(), 'week_ratio'] = 0

            # 現法毎日当たり数量を算出
            prediction['数量'] = prediction['受注日稼働flg'] * prediction['出荷日稼働flg'] * prediction['月平均本数'] * prediction[
                'week_ratio'] * prediction['n_ratio'] * prediction['small_ratio']
            prediction = prediction.round({'数量': 3})

            # 受注日と出荷日毎のデータを出力
            f_name = pg_name + '_prediction_row.tsv'
            prediction.to_csv(local_pass + f_name, sep='\t', encoding=font, quotechar='"', line_terminator='\n',
                              index=False)

            # 集計後、リスト形式からスプレットシート形式に変更して出力
            prediction_sup = prediction.groupby(['納期属性', '工場受注日'], as_index=True)['数量'].sum()
            prediction_sup = prediction_sup.round(3)
            prediction_sup = prediction_sup.unstack()

            f_name = pg_name + '_prediction_for_Supplier.tsv'
            prediction_sup.to_csv(local_pass + f_name, sep='\t', encoding=font, quotechar='"', line_terminator='\n',
                              index=True)


            print('受注予測作成 Finish!')
        else:
            print('受注予測作成 Finish 該当する実績データがありません!')
Exemple #14
0
 def get_business_days_in_month(*, end_of_month):
     """ Returns business days from start of the month until specified date of the same month """
     start_of_month = end_of_month - pdo.MonthBegin()
     return ContractCalendar.get_business_days(start=start_of_month,
                                               end=end_of_month)