Beispiel #1
0
def time_for_next_update(last_time, period='D', hour=9):
    """计算下次更新时间
    说明:
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将小时调整到指定的hour
    """
    if pd.isnull(last_time):
        return MARKET_START
    period = period.upper()
    if period == 'D':
        d = BDay(normalize=True)
        return d.apply(last_time).replace(hour=hour)
    elif period == 'W':
        w = Week(normalize=True, weekday=0)
        return w.apply(last_time).replace(hour=hour)
    elif period == 'M':
        m = MonthBegin(normalize=True)
        return m.apply(last_time).replace(hour=hour)
    elif period == 'Q':
        q = QuarterBegin(normalize=True)
        return q.apply(last_time).replace(hour=hour)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(('D', 'W', 'M', 'Q')))
Beispiel #2
0
 def test_repr(self):
     expected = "<QuarterBegin: startingMonth=3>"
     assert repr(QuarterBegin()) == expected
     expected = "<QuarterBegin: startingMonth=3>"
     assert repr(QuarterBegin(startingMonth=3)) == expected
     expected = "<QuarterBegin: startingMonth=1>"
     assert repr(QuarterBegin(startingMonth=1)) == expected
Beispiel #3
0
def next_update_time(last_updated, freq='D', hour=18, minute=0):
    """计算下次更新时间
    说明:
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    freq = freq.upper()
    if freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(('D', 'W', 'M', 'Q')))
 def to_offset(self) -> DateOffset:
     if self.value == "H":
         return Hour(1)
     elif self.value == "D":
         return Day(1)
     elif self.value == "W-MON":
         return Week(1, weekday=0)
     elif self.value == "MS":
         return MonthBegin(1)
     elif self.value == "QS-DEC":
         return QuarterBegin(startingMonth=10)
     elif self.value == "AS":
         return YearBegin(1)
     raise NotImplementedError(self.value)
Beispiel #5
0
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0):
    """计算下次更新时间
    说明:
        'S':移动到下一秒
        'm':移动到下一分钟
        'H':移动到下一小时
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    if freq == 'S':
        off = Second()
        return last_updated + off
    elif freq == 'm':
        off = Minute()
        return last_updated + off
    elif freq == 'H':
        off = Hour()
        return last_updated + off
    elif freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute, second=second)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(
            ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
Beispiel #6
0
 def ls_returns_sheet(self, cur_day=None):
     if cur_day is None:
         cur_day = pd.to_datetime(
             data_source.trade_calendar.get_latest_trade_days(
                 datetime.today().strftime("%Y%m%d")))
     else:
         cur_day = pd.to_datetime(cur_day)
     dates = [
         cur_day,
         cur_day.to_period('W').start_time, cur_day + MonthBegin(-1),
         cur_day + QuarterBegin(-1), cur_day + MonthBegin(-6),
         cur_day + YearBegin(-1), cur_day + YearBegin(-2)
     ]
     returns = list(map(lambda x: self.ls_range_pct(x, cur_day), dates)) + \
               [self.ls_annual_return, self.ls_total_return]
     return pd.DataFrame([returns],
                         columns=[
                             '日回报', '本周以来', '本月以来', '本季以来', '近6个月', '今年以来',
                             '近两年', '年化回报', '成立以来'
                         ])
def create_data():
    """ create the pickle/msgpack data """

    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
# this is for saving the results
df_columns = ["equal", "constant", "sample", "lw", "model", "combined"]
df_index = []
frob_rows = []
var_rows = []
sharpe_rows = []

# testing the model
for i in range(test_intervals):

    if mode == "val":
        y = 2016
    if mode == "test":
        y = 2018
    sample_start = datetime(year=y, month=1, day=1) + QuarterBegin(
        startingMonth=1, n=i * time_horizon_quarters)
    sample_stop = sample_start + DateOffset(years=1) - DateOffset(days=1)

    out_of_sample_start = sample_stop + DateOffset(days=1)
    out_of_sample_stop = out_of_sample_start + QuarterEnd(
        startingMonth=3, n=time_horizon_quarters)

    # creating the reports and returns for the test.
    # Includes sample (previous time frame used for empirical estimation) and the test set
    returns_sample = get_returns_for_period(df_returns, sample_start,
                                            sample_stop)
    returns_out_of_sample = get_returns_for_period(df_returns,
                                                   out_of_sample_start,
                                                   out_of_sample_stop)

    reports_sample = get_reports_for_date(df_reports,
Beispiel #9
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))

    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(
                      date_range('20130101', periods=5, tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(float=DataFrame({
        'A': series['float'],
        'B': series['float'] + 1
    }),
                 int=DataFrame({
                     'A': series['int'],
                     'B': series['int'] + 1
                 }),
                 mixed=DataFrame({k: data[k]
                                  for k in ['A', 'B', 'C', 'D']}),
                 mi=DataFrame(
                     {
                         'A': np.arange(5).astype(np.float64),
                         'B': np.arange(5).astype(np.int64)
                     },
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=['A', 'B', 'A']),
                 cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}),
                 cat_and_float=DataFrame({
                     'A':
                     Categorical(['foo', 'bar', 'baz']),
                     'B':
                     np.arange(3).astype(np.int64)
                 }),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET')
                     },
                     index=range(5)),
                 dt_mixed2_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET'),
                         'C': Timestamp('20130603', tz='UTC')
                     },
                     index=range(5)))

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Beispiel #10
0
 def test_isAnchored(self):
     assert QuarterBegin(startingMonth=1).isAnchored()
     assert QuarterBegin().isAnchored()
     assert not QuarterBegin(2, startingMonth=1).isAnchored()
Beispiel #11
0
class TestQuarterBegin(Base):

    def test_repr(self):
        expected = "<QuarterBegin: startingMonth=3>"
        assert repr(QuarterBegin()) == expected
        expected = "<QuarterBegin: startingMonth=3>"
        assert repr(QuarterBegin(startingMonth=3)) == expected
        expected = "<QuarterBegin: startingMonth=1>"
        assert repr(QuarterBegin(startingMonth=1)) == expected

    def test_isAnchored(self):
        assert QuarterBegin(startingMonth=1).isAnchored()
        assert QuarterBegin().isAnchored()
        assert not QuarterBegin(2, startingMonth=1).isAnchored()

    def test_offset_corner_case(self):
        # corner
        offset = QuarterBegin(n=-1, startingMonth=1)
        assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1)

    offset_cases = []
    offset_cases.append((QuarterBegin(startingMonth=1), {
        datetime(2007, 12, 1): datetime(2008, 1, 1),
        datetime(2008, 1, 1): datetime(2008, 4, 1),
        datetime(2008, 2, 15): datetime(2008, 4, 1),
        datetime(2008, 2, 29): datetime(2008, 4, 1),
        datetime(2008, 3, 15): datetime(2008, 4, 1),
        datetime(2008, 3, 31): datetime(2008, 4, 1),
        datetime(2008, 4, 15): datetime(2008, 7, 1),
        datetime(2008, 4, 1): datetime(2008, 7, 1)}))

    offset_cases.append((QuarterBegin(startingMonth=2), {
        datetime(2008, 1, 1): datetime(2008, 2, 1),
        datetime(2008, 1, 31): datetime(2008, 2, 1),
        datetime(2008, 1, 15): datetime(2008, 2, 1),
        datetime(2008, 2, 29): datetime(2008, 5, 1),
        datetime(2008, 3, 15): datetime(2008, 5, 1),
        datetime(2008, 3, 31): datetime(2008, 5, 1),
        datetime(2008, 4, 15): datetime(2008, 5, 1),
        datetime(2008, 4, 30): datetime(2008, 5, 1)}))

    offset_cases.append((QuarterBegin(startingMonth=1, n=0), {
        datetime(2008, 1, 1): datetime(2008, 1, 1),
        datetime(2008, 12, 1): datetime(2009, 1, 1),
        datetime(2008, 1, 1): datetime(2008, 1, 1),
        datetime(2008, 2, 15): datetime(2008, 4, 1),
        datetime(2008, 2, 29): datetime(2008, 4, 1),
        datetime(2008, 3, 15): datetime(2008, 4, 1),
        datetime(2008, 3, 31): datetime(2008, 4, 1),
        datetime(2008, 4, 15): datetime(2008, 7, 1),
        datetime(2008, 4, 30): datetime(2008, 7, 1)}))

    offset_cases.append((QuarterBegin(startingMonth=1, n=-1), {
        datetime(2008, 1, 1): datetime(2007, 10, 1),
        datetime(2008, 1, 31): datetime(2008, 1, 1),
        datetime(2008, 2, 15): datetime(2008, 1, 1),
        datetime(2008, 2, 29): datetime(2008, 1, 1),
        datetime(2008, 3, 15): datetime(2008, 1, 1),
        datetime(2008, 3, 31): datetime(2008, 1, 1),
        datetime(2008, 4, 15): datetime(2008, 4, 1),
        datetime(2008, 4, 30): datetime(2008, 4, 1),
        datetime(2008, 7, 1): datetime(2008, 4, 1)}))

    offset_cases.append((QuarterBegin(startingMonth=1, n=2), {
        datetime(2008, 1, 1): datetime(2008, 7, 1),
        datetime(2008, 2, 15): datetime(2008, 7, 1),
        datetime(2008, 2, 29): datetime(2008, 7, 1),
        datetime(2008, 3, 15): datetime(2008, 7, 1),
        datetime(2008, 3, 31): datetime(2008, 7, 1),
        datetime(2008, 4, 15): datetime(2008, 10, 1),
        datetime(2008, 4, 1): datetime(2008, 10, 1)}))

    @pytest.mark.parametrize('case', offset_cases)
    def test_offset(self, case):
        offset, cases = case
        for base, expected in compat.iteritems(cases):
            assert_offset_equal(offset, base, expected)
Beispiel #12
0
    ser = pd.Series(rng)

    res = rng + offset
    assert res.freq is None  # not retained
    res_v2 = offset.apply_index(rng)
    assert (res == res_v2).all()
    assert res[0] == rng[0] + offset
    assert res[-1] == rng[-1] + offset
    res2 = ser + offset
    # apply_index is only for indexes, not series, so no res2_v2
    assert res2.iloc[0] == ser.iloc[0] + offset
    assert res2.iloc[-1] == ser.iloc[-1] + offset


@pytest.mark.parametrize(
    "offset", [QuarterBegin(), QuarterEnd(), BQuarterBegin(), BQuarterEnd()]
)
def test_on_offset(offset):
    dates = [
        datetime(2016, m, d)
        for m in [10, 11, 12]
        for d in [1, 2, 3, 28, 29, 30, 31]
        if not (m == 11 and d == 31)
    ]
    for date in dates:
        res = offset.is_on_offset(date)
        slow_version = date == (date + offset) - offset
        assert res == slow_version


# --------------------------------------------------------------------
Beispiel #13
0
        f = grid_formatter(grid_columns(data))
        data = f.format_dicts(data.itertuples())
        return jsonify(data=data, x=cols[0], y=cols[1], stats=stats)
    except BaseException as e:
        return jsonify(
            dict(error=str(e), traceback=str(traceback.format_exc())))


DATE_RANGES = {
    'W':
    lambda today: today - Day(today.dayofweek),
    'M':
    lambda today: today if today.is_month_start else today - MonthBegin(),
    'Q':
    lambda today: today
    if today.is_quarter_start else today - QuarterBegin(startingMonth=1),
    'Y':
    lambda today: today if today.is_year_start else today - YearBegin(),
}


@dtale.route('/coverage')
@swag_from('swagger/dtale/views/coverage.yml')
def find_coverage():
    """
    Flask route which returns coverage information(counts) for a column grouped by other column(s)

    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param col: string from flask.request.args['col'] containing name of a column in your dataframe
    :param filters(deprecated): JSON string from flaks.request.args['filters'] with filtering information from group
           drilldown [
Beispiel #14
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10))

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
              ])),
                                          names=[u'first', u'second']))

    series = dict(
        float=Series(data[u'A']),
        int=Series(data[u'B']),
        mixed=Series(data[u'E']),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range('20130101', periods=10)),
        mi=Series(np.arange(5).astype(np.float64),
                  index=MultiIndex.from_tuples(tuple(
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
        dup=Series(np.arange(5).astype(np.float64),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
        float=DataFrame({
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
        }),
        int=DataFrame({
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
        mi=DataFrame(
            {
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(tuple(
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
        cat_and_float=DataFrame({
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
            },
            index=range(5)),
        dt_mixed2_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')
            },
            index=range(5)))

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'int']
        })
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'float'] + 1
        }),
                     dup=Panel(np.arange(30).reshape(3, 5,
                                                     2).astype(np.float64),
                               items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < '0.19.2':
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Beispiel #15
0
def time_for_next_update(last_time, freq='D', num=9, is_end=False):
    """前次更新后下一次更新时间

    Arguments:
        last_time {obj} -- 上次时间

    Keyword Arguments:
        freq {str} -- 更新周期 (default: {'D'})
        num {int} -- 日级别以下为单位数,以上为小时数 (default: {9})
        is_end {bool} -- 是否为周期尾部 (default: {False})

    Raises:
        TypeError: 不能识别的周期类型

    Returns:
        Timestamp -- 下一次更新时间

    Notes:
        一、 freq < D
            `num`代表周期数
            上一时点`normalize`后移动`num`周期,不考虑开始及结束问题
        二、 freq in D、B
            `num`代表小时
            对于历史时间,上一时点`normalize`后一律移动到下一个周期,且将小时调整到指定的num
            如上一时点其日期为当前日期,且在其`normalize`及调整小时后的值晚于上一时点,则取调整后的值
        三、 freq > D 开始及结束才有效
            `num`无效
            如周初、周末、月初、月末、季初、季末、年初、年末
            此时num数字不起作用
    """
    valid_freq = ('B', 'D', 'W', 'M', 'Q', 'H', 'MIN')
    if pd.isnull(last_time):
        return pd.Timestamp(MARKET_START)
    assert isinstance(
        last_time, pd.Timestamp), f'类型错误,希望Timestamp,实际为{type(last_time)}'
    now = pd.Timestamp.now(tz=last_time.tz)
    assert last_time <= now, '过去时间必须小于当前时间'
    freq = freq.upper()
    if freq == 'MIN':
        offset = Minute(n=num)
        return offset.apply(last_time.floor(freq))
    if freq == 'H':
        offset = Hour(n=num)
        return offset.apply(last_time.floor(freq))
    if freq == 'D':
        # √ 此处要考虑小时数
        limit = last_time.floor(freq).replace(hour=num)
        if last_time < limit:
            return limit
        else:
            offset = Day()
            return offset.apply(last_time.floor(freq)).replace(hour=num)
    if freq == 'B':
        offset = BDay()
        # 工作日
        if last_time.weekday() in range(0, 5):
            # √ 此处要考虑小时数
            limit = last_time.normalize().replace(hour=num)
            if last_time < limit:
                return limit
            else:
                return offset.apply(last_time.normalize()).replace(hour=num)
        else:
            return offset.apply(last_time.normalize()).replace(hour=num)
    if freq == 'W':
        nw = last_time.normalize() + pd.Timedelta(weeks=1)
        if is_end:
            return nw + pd.Timedelta(days=7-nw.weekday()) - pd.Timedelta(nanoseconds=1)
        else:
            return nw - pd.Timedelta(days=nw.weekday())
    if freq == 'M':
        if is_end:
            offset = MonthEnd(n=2)
            res = offset.apply(last_time.normalize())
            if last_time.is_month_end:
                res = offset.rollback(res)
            return res
        else:
            offset = MonthBegin()
            return offset.apply(last_time.normalize())
    if freq == 'Q':
        if is_end:
            offset = QuarterEnd(n=2, startingMonth=3, normalize=True)
            res = offset.apply(last_time)
            if last_time.is_quarter_end:
                offset = QuarterEnd(n=-1, startingMonth=3, normalize=True)
                res = offset.apply(res)
            return res
        else:
            offset = QuarterBegin(n=1, normalize=True, startingMonth=1)
            return offset.apply(last_time)
    if freq == 'Y':
        if last_time.year == now.year:
            if is_end:
                return last_time.normalize().replace(year=now.year, month=12, day=31)
            else:
                return last_time.normalize().replace(year=now.year, month=1, day=1)
        if is_end:
            offset = YearEnd(normalize=True, month=12, n=2)
            res = offset.apply(last_time)
            if last_time.is_year_end:
                offset = YearEnd(n=-1, month=12, normalize=True)
                res = offset.apply(res)
            return res
        else:
            offset = YearBegin(normalize=True, month=1, n=1)
            return offset.apply(last_time)
    raise ValueError('不能识别的周期类型,仅接受{}。实际输入为{}'.format(
        valid_freq, freq))
Beispiel #16
0
    rng = pd.date_range(start='1/1/2000', periods=100000, freq='T')
    ser = pd.Series(rng)

    res = rng + offset
    res_v2 = offset.apply_index(rng)
    assert (res == res_v2).all()
    assert res[0] == rng[0] + offset
    assert res[-1] == rng[-1] + offset
    res2 = ser + offset
    # apply_index is only for indexes, not series, so no res2_v2
    assert res2.iloc[0] == ser.iloc[0] + offset
    assert res2.iloc[-1] == ser.iloc[-1] + offset


@pytest.mark.parametrize(
    'offset', [QuarterBegin(),
               QuarterEnd(),
               BQuarterBegin(),
               BQuarterEnd()])
def test_on_offset(offset):
    dates = [
        datetime(2016, m, d) for m in [10, 11, 12]
        for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)
    ]
    for date in dates:
        res = offset.onOffset(date)
        slow_version = date == (date + offset) - offset
        assert res == slow_version


# --------------------------------------------------------------------
Beispiel #17
0
def create_data():
    """create the pickle data"""
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = {
        "timestamp": Timestamp("20130101"),
        "period": Period("2012", "M")
    }

    index = {
        "int": Index(np.arange(10)),
        "date": date_range("20130101", periods=10),
        "period": period_range("2013-01-01", freq="M", periods=10),
        "float": Index(np.arange(10, dtype=np.float64)),
        "uint": Index(np.arange(10, dtype=np.uint64)),
        "timedelta": timedelta_range("00:00:00", freq="30T", periods=10),
    }

    index["range"] = RangeIndex(10)

    index["interval"] = interval_range(0, periods=10)

    mi = {
        "reg2":
        MultiIndex.from_tuples(
            tuple(
                zip(*[
                    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                    ["one", "two", "one", "two", "one", "two", "one", "two"],
                ])),
            names=["first", "second"],
        )
    }

    series = {
        "float":
        Series(data["A"]),
        "int":
        Series(data["B"]),
        "mixed":
        Series(data["E"]),
        "ts":
        Series(np.arange(10).astype(np.int64),
               index=date_range("20130101", periods=10)),
        "mi":
        Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        "dup":
        Series(np.arange(5).astype(np.float64),
               index=["A", "B", "C", "D", "A"]),
        "cat":
        Series(Categorical(["foo", "bar", "baz"])),
        "dt":
        Series(date_range("20130101", periods=5)),
        "dt_tz":
        Series(date_range("20130101", periods=5, tz="US/Eastern")),
        "period":
        Series([Period("2000Q1")] * 5),
    }

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = {
        "float":
        DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        "int":
        DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        "mixed":
        DataFrame({k: data[k]
                   for k in ["A", "B", "C", "D"]}),
        "mi":
        DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        "dup":
        DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                  columns=["A", "B", "A"]),
        "cat_onecol":
        DataFrame({"A": Categorical(["foo", "bar"])}),
        "cat_and_float":
        DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        "mixed_dup":
        mixed_dup_df,
        "dt_mixed_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        "dt_mixed2_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    }

    cat = {
        "int8": Categorical(list("abcdefg")),
        "int16": Categorical(np.arange(1000)),
        "int32": Categorical(np.arange(10000)),
    }

    timestamp = {
        "normal": Timestamp("2011-01-01"),
        "nat": NaT,
        "tz": Timestamp("2011-01-01", tz="US/Eastern"),
    }

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return {
        "series": series,
        "frame": frame,
        "index": index,
        "scalars": scalars,
        "mi": mi,
        "sp_series": {
            "float": _create_sp_series(),
            "ts": _create_sp_tsseries()
        },
        "sp_frame": {
            "float": _create_sp_frame()
        },
        "cat": cat,
        "timestamp": timestamp,
        "offsets": off,
    }
Beispiel #18
0
    offset = cls(n=n)
    rng = pd.date_range(start='1/1/2000', periods=100000, freq='T')
    ser = pd.Series(rng)

    res = rng + offset
    res_v2 = offset.apply_index(rng)
    assert (res == res_v2).all()
    assert res[0] == rng[0] + offset
    assert res[-1] == rng[-1] + offset
    res2 = ser + offset
    # apply_index is only for indexes, not series, so no res2_v2
    assert res2.iloc[0] == ser.iloc[0] + offset
    assert res2.iloc[-1] == ser.iloc[-1] + offset


@pytest.mark.parametrize('offset', [QuarterBegin(), QuarterEnd(),
                                    BQuarterBegin(), BQuarterEnd()])
def test_on_offset(offset):
    dates = [datetime(2016, m, d)
             for m in [10, 11, 12]
             for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)]
    for date in dates:
        res = offset.onOffset(date)
        slow_version = date == (date + offset) - offset
        assert res == slow_version


# --------------------------------------------------------------------
# Months

class TestMonthBegin(Base):
from django.conf import settings
from pandas.tseries.offsets import YearBegin, QuarterBegin, MonthBegin, Day

# Transformaciones
VALUE = 'value'
CHANGE = 'change'
PCT_CHANGE = 'percent_change'
CHANGE_YEAR_AGO = 'change_a_year_ago'
PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago'
CHANGE_BEG_YEAR = 'change_since_beginning_of_year'
PCT_CHANGE_BEG_YEAR = 'percent_change_since_beginning_of_year'

# Pandas freqs
PANDAS_YEAR = YearBegin()
PANDAS_SEMESTER = MonthBegin(6)
PANDAS_QUARTER = QuarterBegin(startingMonth=1)
PANDAS_MONTH = MonthBegin()
PANDAS_WEEK = Day(7)
PANDAS_DAY = Day()

# Frecuencias *en orden* de mayor a menor
PANDAS_FREQS = [
    PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK,
    PANDAS_DAY
]

IDENTIFIER = "identifier"
DATASET_IDENTIFIER = "dataset_identifier"
DOWNLOAD_URL = "downloadURL"

DATASET = 'dataset'
Beispiel #20
0
    rng = pd.date_range(start="1/1/2000", periods=100000, freq="T")
    ser = pd.Series(rng)

    res = rng + offset
    res_v2 = offset.apply_index(rng)
    assert (res == res_v2).all()
    assert res[0] == rng[0] + offset
    assert res[-1] == rng[-1] + offset
    res2 = ser + offset
    # apply_index is only for indexes, not series, so no res2_v2
    assert res2.iloc[0] == ser.iloc[0] + offset
    assert res2.iloc[-1] == ser.iloc[-1] + offset


@pytest.mark.parametrize(
    "offset", [QuarterBegin(),
               QuarterEnd(),
               BQuarterBegin(),
               BQuarterEnd()])
def test_on_offset(offset):
    dates = [
        datetime(2016, m, d) for m in [10, 11, 12]
        for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)
    ]
    for date in dates:
        res = offset.onOffset(date)
        slow_version = date == (date + offset) - offset
        assert res == slow_version


# --------------------------------------------------------------------
Beispiel #21
0
def Selecting_reports():
    """
    From reports in pap_data select reports
    so that consolidated are left where both conso and unit are available
    only first report for period is left where multiple announcements found
    """
    current_dir = os.getcwd()
    file2 = current_dir + '\\data\\pap\\Reports2.csv'
    dtypes = {
        'report_period1': np.float64,
        'report_period2': np.float64,
        'Profit': np.float64,
        'NoShares': np.float64,
        'consolidated': np.bool
    }
    pap_data = pd.read_csv(file2,
                           parse_dates=['Datetime', 'start_date', 'end_date'],
                           sep=',',
                           index_col=0,
                           header=0,
                           dtype=dtypes)
    # Reports definitions
    annual_reports = [
        'SA-R', 'SA-RS', 'SAB-RS', 'SAB-R', 'SAF-R', 'SAU-R', 'RS', 'SAU-RS',
        'R', 'SAF-RS'
    ]
    annualS_reports = ['SA-RS', 'SAB-RS', 'RS', 'SAU-RS', 'SAF-RS']
    semi_reports = [
        'SA-P', 'SA-PS', 'PSr', 'P', 'PS', 'SA-PSr', 'SAU-P', 'SAB-P',
        'SAU-PSr', 'SAF-P', 'SAF-PS', 'SAF-PSr'
    ]
    semiS_reports = [
        'SA-PS', 'PSr', 'PS', 'SA-PSr', 'SAU-PSr', 'SAF-PS', 'SAF-PSr'
    ]
    q_reports = [
        'SAB-QSr', 'SA-QSr', 'SA-Q', 'SA-QS', 'SAF-Q', 'SAB-Q', 'SAB-QS',
        'SAU-Q', 'SAU-QSr', 'SAU-QS', 'SAF-QSr', 'SAF-QS', 'QSr', 'Q', 'QS'
    ]
    qS_reports = [
        'SAB-QSr', 'SA-QSr', 'SA-QS', 'SAB-QS', 'SAU-QSr', 'SAU-QS', 'SAF-QSr',
        'SAF-QS', 'QSr', 'QS'
    ]
    consolidated = annualS_reports + semiS_reports + qS_reports
    unit = annual_reports + semi_reports + q_reports
    for x in consolidated:
        unit.remove(x)
    # Removing R and P reports for years 2005-2008, they were late and only copied info from previously announced Q reports
    annual_reports_index_list = list(pap_data[
        (pap_data['report_period2'].isin([2004, 2005, 2006, 2007, 2008]))
        & (pap_data['report_type'].isin(annual_reports))].index)
    semi_reports_index_list = list(pap_data[
        (pap_data['report_period2'].isin([2004, 2005, 2006, 2007, 2008]))
        & (pap_data['report_type'].isin(semi_reports))].index)
    pap_data.drop(annual_reports_index_list, axis=0, inplace=True)
    pap_data.drop(semi_reports_index_list, axis=0, inplace=True)
    # Drop unit reports where consolidated available
    for stock in list(pap_data['Name'].unique()):
        stocks_reports = pap_data[pap_data['Name'] == stock]
        for year in list(stocks_reports['report_period2'].unique()):
            years_reports = pap_data.loc[(pap_data['Name'] == stock) &
                                         (pap_data['report_period2'] == year)]
            if len(years_reports) > 4:
                if len(years_reports.consolidated.unique()) > 1:
                    list_of_unit_reports_index = list(years_reports[
                        years_reports['consolidated'] == False].index)
                    pap_data.drop(list_of_unit_reports_index,
                                  axis=0,
                                  inplace=True)
                    years_reports = pap_data.loc[
                        (pap_data['Name'] == stock)
                        & (pap_data['report_period2'] == year)]
            #Drop reports where more then two in a quarter available, leave the earliest announced
            for quarter in pap_data.loc[(pap_data['Name'] == stock) &
                                        (pap_data['report_period2'] == year),
                                        'report_period1'].unique():
                quarter_reports = pap_data.loc[
                    (pap_data['Name'] == stock)
                    & (pap_data['report_period2'] == year) &
                    (pap_data['report_period1'] == quarter)]
                if len(quarter_reports) > 1:
                    list_of_report_index = list(quarter_reports.index)
                    first_announced_index = int(quarter_reports[
                        quarter_reports.Datetime ==
                        quarter_reports.Datetime.min()].index[0])
                    list_of_report_index.remove(first_announced_index)
                    pap_data.drop(list_of_report_index, axis=0, inplace=True)
        # Droping reports with less then 5 reports in the data set
        stocks_reports = pap_data[pap_data['Name'] == stock]
        if len(stocks_reports) < 6:
            stocks_reports_index = list(stocks_reports.index)
            pap_data.drop(stocks_reports_index, axis=0, inplace=True)
    """
    Unrolling_profit
    All announced earnings are rolling quarter
    To unroll earnings subtract previous quarterly profit from current profit
    apart from first quarter
    """
    pap_data.loc[pap_data['report_period1'] == 1,
                 'Profit_Q'] = pap_data['Profit_t']
    for stock in list(pap_data.Name.unique()):
        stocks_reports = pap_data[pap_data['Name'] == stock]
        pap_data.loc[pap_data['Name'] == stock,
                     'Profit_helper'] = stocks_reports[
                         'Profit_t'] - stocks_reports['Profit_t'].shift(1)
        pap_data.loc[(pap_data['Name'] == stock) &
                     (pap_data['report_period1'].isin([2, 3, 4, 5, 6])),
                     'Profit_Q'] = pap_data['Profit_helper']
    pap_data.drop('Profit_helper', axis=1, inplace=True)
    pap_data['Q_start_date'] = [
        date - QuarterBegin(startingMonth=1) for date in pap_data['end_date']
    ]
    pap_data.loc[pap_data['Q_start_date'] < '20000101', 'Q_start_date'] = None
    pap_data.loc[pap_data['Q_start_date'] > '20190401', 'Q_start_date'] = None
    pap_data['aprox_date'] = pap_data['Datetime'].dt.date - QuarterBegin(
        n=2, startingMonth=1)
    pap_data['Q_start_date'].fillna(pap_data['aprox_date'], inplace=True)
    """
    Creating_profits_DataFrame(pap_data):
    Based on data in pap_data creating DataFrames with quarter start date and stock name as indexes
    Seperate Df is created for every value: Profit, Publication date, NoShares
    """
    RowIndex = pd.date_range(start='19971231', end='20190401',
                             freq='Q') + pd.Timedelta(1, unit='d')
    ColumnIndex = list(pap_data.Name.sort_values().unique())
    Profit_df = pd.DataFrame(columns=ColumnIndex, index=RowIndex)
    Dates_df = pd.DataFrame(columns=ColumnIndex, index=RowIndex)
    Noshares_df = pd.DataFrame(columns=ColumnIndex, index=RowIndex)
    for stock in list(pap_data.Name.unique()):
        stocks_reports_index = pap_data.loc[pap_data['Name'] == stock].index
        for index in stocks_reports_index:
            Name = pap_data.loc[index, 'Name']
            Date_pub = pap_data.loc[index, 'Datetime']
            Profit = pap_data.loc[index, 'Profit_Q']
            NoShares = pap_data.loc[index, 'NoShares']
            QDate = pap_data.loc[index, 'Q_start_date']
            Profit_df.loc[QDate, Name] = Profit
            Dates_df.loc[QDate, Name] = Date_pub
            Noshares_df.loc[QDate, Name] = NoShares
    filep = current_dir + '\\data\\pap\\PAPProfit.csv'
    Profit_df.to_csv(filep, sep=',', encoding='UTF-8')
    filed = current_dir + '\\data\\pap\\PAPDates.csv'
    Dates_df.to_csv(filed, sep=',', encoding='UTF-8')
    files = current_dir + '\\data\\pap\\PAPNoshares.csv'
    Noshares_df.to_csv(files, sep=',', encoding='UTF-8')
    return 0
Beispiel #22
0
 def test_offset_corner_case(self):
     # corner
     offset = QuarterBegin(n=-1, startingMonth=1)
     assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1)
Beispiel #23
0
    QuarterBegin,
    QuarterEnd,
)


def test_quarterly_dont_normalize():
    date = datetime(2012, 3, 31, 5, 30)

    offsets = (QuarterBegin, QuarterEnd)

    for klass in offsets:
        result = date + klass()
        assert result.time() == date.time()


@pytest.mark.parametrize("offset", [QuarterBegin(), QuarterEnd()])
def test_on_offset(offset):
    dates = [
        datetime(2016, m, d) for m in [10, 11, 12]
        for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)
    ]
    for date in dates:
        res = offset.is_on_offset(date)
        slow_version = date == (date + offset) - offset
        assert res == slow_version


class TestQuarterBegin(Base):
    def test_repr(self):
        expected = "<QuarterBegin: startingMonth=3>"
        assert repr(QuarterBegin()) == expected
Beispiel #24
0
    'Q-JAN' : QuarterEnd(startingMonth=1),
    'Q-FEB' : QuarterEnd(startingMonth=2),
    'Q-MAR' : QuarterEnd(startingMonth=3),
    'Q-APR' : QuarterEnd(startingMonth=4),
    'Q-MAY' : QuarterEnd(startingMonth=5),
    'Q-JUN' : QuarterEnd(startingMonth=6),
    'Q-JUL' : QuarterEnd(startingMonth=7),
    'Q-AUG' : QuarterEnd(startingMonth=8),
    'Q-SEP' : QuarterEnd(startingMonth=9),
    'Q-OCT' : QuarterEnd(startingMonth=10),
    'Q-NOV' : QuarterEnd(startingMonth=11),
    'Q-DEC' : QuarterEnd(startingMonth=12),

    # Quarterly - Calendar (Start)
    # 'QS'     : QuarterBegin(startingMonth=1),
    'QS-JAN' : QuarterBegin(startingMonth=1),
    'QS-FEB' : QuarterBegin(startingMonth=2),
    'QS-MAR' : QuarterBegin(startingMonth=3),
    'QS-APR' : QuarterBegin(startingMonth=4),
    'QS-MAY' : QuarterBegin(startingMonth=5),
    'QS-JUN' : QuarterBegin(startingMonth=6),
    'QS-JUL' : QuarterBegin(startingMonth=7),
    'QS-AUG' : QuarterBegin(startingMonth=8),
    'QS-SEP' : QuarterBegin(startingMonth=9),
    'QS-OCT' : QuarterBegin(startingMonth=10),
    'QS-NOV' : QuarterBegin(startingMonth=11),
    'QS-DEC' : QuarterBegin(startingMonth=12),

    # Quarterly - Business
    'BQ-JAN' : BQuarterEnd(startingMonth=1),
    'BQ-FEB' : BQuarterEnd(startingMonth=2),
Beispiel #25
0
def bin_df(df):
    quarters = df['$date_to'] - QuarterBegin(startingMonth=1)
    df.index = pd.DatetimeIndex(quarters, ambiguous='infer').floor('D')
    return df.groupby(level=0).count()