Example #1
0
def GetEndDateList(data, freq, trim_end=False):
    '''
    trim主要用于日度数据resample成低频数据
    :param data:
    :param freq:
    :param trim_end:
    :return:
    '''
    if freq == 'M':
        date_list = data.index.where(
            data.index == ((data.index + MonthEnd()) - MonthEnd()),
            data.index + MonthEnd())
    elif freq == 'W':
        week_day = 5  # 0-6分别对应周一至周日
        date_list = data.index.where(
            data.index == ((data.index + Week(weekday=week_day)) - Week()),
            data.index + Week(weekday=week_day))
    elif freq == 'Y':
        date_list = data.index.where(
            data.index == ((data.index + YearEnd()) - YearEnd()),
            data.index + YearEnd())
    if trim_end:
        return sorted(set(date_list))[:-1]
    else:
        return sorted(set(date_list))
Example #2
0
def resample_index(dat, to_freq):
    '''
    例如: 20180808 -> 20180831
          20180809 -> 20180831
    注意:
        1、使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错;
        2、此方法会掩盖真实交易日期(全都转换为自然年月末尾值)
    :param dat:
    :param to_freq:
    :return:
    '''
    data = dat.copy()
    if to_freq == 'M':
        data.index = data.index.where(
            data.index == ((data.index + MonthEnd()) - MonthEnd()),
            data.index + MonthEnd())
    elif to_freq == 'W':
        # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误
        week_day = 5  #0-6分别对应周一至周日
        data.index = data.index.where(
            data.index == ((data.index + Week(weekday=week_day)) - Week()),
            data.index + Week(weekday=week_day))

    elif to_freq == 'Y':
        data.index = data.index.where(
            data.index == ((data.index + YearEnd()) - YearEnd()),
            data.index + YearEnd())
    return data
Example #3
0
def GetEndDateList(data, freq, trim_end=False):
    '''
    trim主要用于日度数据
    :param data:
    :param freq:
    :param trim_end:
    :return:
    '''
    if freq=='M':
        date_list=data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()),
                                      data.index + MonthEnd())
        #date_list=pd.to_datetime(date_list.astype(str),format='%Y%m')+MonthEnd()
    elif freq=='W':
        week_day = 5  # 0-6分别对应周一至周日
        date_list = data.index.where(data.index == ((data.index + Week(weekday=week_day)) - Week()),
                                      data.index + Week(weekday=week_day))

        #date_list=pd.to_datetime(date_list.astype(str).str.pad(7,side='right',fillchar='6'),format='%Y%W%w')
    elif freq=='Y':
        date_list = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()),
                                      data.index + YearEnd())
        #date_list = pd.to_datetime(date_list.astype(str), format='%Y') + YearEnd()
    if trim_end:
        return sorted(set(date_list))[:-1]
    else:
        return sorted(set(date_list))
        def get_data(self):
            """
            基于现金的盈利能力 = 营业利润 + 应收账款的减少 + 库存减少 + 应付账款的增长 + 应计负债的增加
            :return:
            """
            date = self.date

            # 提取近两年的应收账款,库存,应付账款,应计负债
            date_1 = datetime.strptime(date, "%Y-%m-%d")
            if date_1.month >= 5:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(1)
                pre_year_date1 = datetime.strptime(date,
                                                   "%Y-%m-%d") - YearEnd(2)
            else:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(2)
                pre_year_date1 = datetime.strptime(date,
                                                   "%Y-%m-%d") - YearEnd(3)

            # 提取营业利润
            ebit_oper = w.wss(
                self.stockcodes, "wgsd_ebit_oper",
                "unit=1;rptDate=" + pre_year_date.strftime("%Y-%m-%d") +
                ";rptType=1;currencyType=")
            acct_rcv = w.wss(
                self.stockcodes, "acct_rcv", "unit=1;rptDate=" +
                pre_year_date.strftime("%Y-%m-%d") + ";rptType=1")
            acct_rcv1 = w.wss(
                self.stockcodes, "acct_rcv", "unit=1;rptDate=" +
                pre_year_date1.strftime("%Y-%m-%d") + ";rptType=1")
            inventory = w.wss(
                self.stockcodes, "inventories", "unit=1;rptDate=" +
                pre_year_date.strftime("%Y-%m-%d") + ";rptType=1")
            inventory1 = w.wss(
                self.stockcodes, "inventories", "unit=1;rptDate=" +
                pre_year_date1.strftime("%Y-%m-%d") + ";rptType=1")
            acc_payable = w.wss(
                self.stockcodes, "acct_payable", "unit=1;rptDate=" +
                pre_year_date.strftime("%Y-%m-%d") + ";rptType=1")
            acc_payable1 = w.wss(
                self.stockcodes, "acct_payable", "unit=1;rptDate=" +
                pre_year_date1.strftime("%Y-%m-%d") + ";rptType=1")

            if any([
                    ebit_oper.ErrorCode, acct_rcv.ErrorCode,
                    acct_rcv1.ErrorCode, inventory.ErrorCode,
                    inventory1.ErrorCode, acc_payable.ErrorCode,
                    acc_payable1.ErrorCode
            ]):
                raise Exception("数据提取异常")

            cashbasedoperprofit = np.array(ebit_oper) - (
                np.array(acct_rcv) - np.array(acct_rcv1)) - (
                    np.array(inventory) - np.array(inventory1)) + (
                        np.array(acc_payable) - np.array(acc_payable1))
            deflate_f = FactorsZoo.deflate_factor(date, self.stockcodes, "ME")
            cashprofit = cashbasedoperprofit / np.array(deflate_f)
            return cashprofit.tolist()
Example #5
0
def calculateTTMValue(in_df, code):
    in_df_date = in_df['report_type'].map(lambda x: pd.to_datetime(x))

    df = pd.read_sql_query(
        "select code,report_type,yysr,jlr,lrze,kjlr,zzc,gdqy,jyjxjl,mgsy,roe,mgjyxjl,mgjzc,mgsy_ttm,mgjyxjl_ttm \
        from hexun_finance_basic \
        where code=%(name)s",
        db.engine,
        params={'name': code})

    df = df.append(in_df)
    i = df['report_type'].map(lambda x: pd.to_datetime(x))
    df3 = df.set_index(i)

    for index, row in df3.iterrows():
        if row.mgjyxjl_ttm is None or row.mgjyxjl_ttm == '':
            # 去年年底
            lastYearEnd = YearEnd().rollback(index)
            # offset = offset.strftime('%Y-%m-%d')
            lastYearQuart = index - pd.DateOffset(months=12)
            app.logger.debug(
                index.strftime('%Y-%m-%d') + ':' +
                lastYearEnd.strftime('%Y-%m-%d') + ':' +
                lastYearQuart.strftime('%Y-%m-%d'))
            try:
                if index.quarter != 4:
                    n_mgsy = float(df3.loc[lastYearEnd].mgsy) - float(
                        df3.loc[lastYearQuart].mgsy) + float(row.mgsy)
                    n_mgjyxjl = float(df3.loc[lastYearEnd].mgjyxjl) - float(
                        df3.loc[lastYearQuart].mgjyxjl) + float(row.mgjyxjl)
                else:
                    n_mgsy = float(row.mgsy)
                    n_mgjyxjl = float(row.mgjyxjl)

                df3.mgsy_ttm.loc[index] = n_mgsy
                df3.mgjyxjl_ttm.loc[index] = n_mgjyxjl

            except Exception, ex:
                app.logger.warn(traceback.format_exc())
                df3.mgsy_ttm.loc[index] = float(row.mgsy)
                df3.mgjyxjl_ttm.loc[index] = float(row.mgjyxjl)

            #数据位截取
            v_mgsy_ttm = round(df3.mgsy_ttm.loc[index], 2)
            v_mgjyxjl_ttm = round(df3.mgjyxjl_ttm.loc[index], 2)
            #零值处理
            v_mgsy_ttm = 0.01 if v_mgsy_ttm == 0 else v_mgsy_ttm
            v_mgjyxjl_ttm = 0.01 if v_mgjyxjl_ttm == 0 else v_mgjyxjl_ttm

            df3.mgsy_ttm.loc[index] = v_mgsy_ttm
            df3.mgjyxjl_ttm.loc[index] = v_mgjyxjl_ttm
def generate_calendar(year):
    from pandas.tseries.offsets import YearEnd
    from pandas.tseries.holiday import USFederalHolidayCalendar
    
    start_date = pd.to_datetime('1/1/'+str(year))
    end_date = start_date + YearEnd()
    DAT = pd.date_range(str(start_date), str(end_date), freq='D')
    WK = [d.strftime('%U') for d in DAT]
    MO = [d.strftime('%B') for d in DAT]
    holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date)

    DAYZ = pd.DataFrame({'Date':DAT, 'WeekNumber':WK, 'Month':MO})
    
    DAYZ['Year'] = [format(d, '%Y') for d in DAT]
    DAYZ['Weekday'] = [format(d, '%A') for d in DAT]
    DAYZ['DOTM'] = [format(d, '%d') for d in DAT]
    DAYZ['IsWeekday'] = DAYZ.Weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday'])
    DAYZ['IsProductionDay'] = DAYZ.Weekday.isin(['Tuesday','Wednesday','Thursday','Friday'])
    last_biz_day = [str(format(dat, '%Y-%m-%d')) for dat in pd.date_range(start_date, end_date, freq='BM')]
    DAYZ['LastSellingDayOfMonth'] = [dat in last_biz_day for dat in DAYZ['Date'].astype(str)]

    DAYZ.loc[DAYZ.WeekNumber.isin(['00','01','02','03','04','05','06','07','08','09','50','51','52','53']), 'Season'] = 'Winter'
    DAYZ.loc[DAYZ.WeekNumber.isin(['10','11','12','13','14','15','16','17','18','19','20','21','22']), 'Season'] = 'Spring'
    DAYZ.loc[DAYZ.WeekNumber.isin(['23','24','25','26','27','28','29','30','31','32','33','34','35']), 'Season'] = 'Summer'
    DAYZ.loc[DAYZ.WeekNumber.isin(['36','37','38','39','40','41','42','43','44','45','46','47','48','49']), 'Season'] = 'Autumn'
    DAYZ['Holiday'] = DAYZ.Date.isin(holidays)
    DAYZ['HolidayWeek'] = DAYZ['Holiday'].rolling(window=7,center=True,min_periods=1).sum()
    DAYZ['ShipWeek'] = ['A' if int(wk) % 2 == 0 else 'B' for wk in WK]

    DAYZ.reset_index(drop=True, inplace=True)
    
    return DAYZ
Example #7
0
def mktimerange(
        time_resolution: TimeResolution,
        date_from: Union[datetime, str],
        date_to: Union[datetime, str] = None) -> Tuple[Timestamp, Timestamp]:
    """
    Compute appropriate time ranges for monthly and annual time resolutions.
    This takes into account to properly floor/ceil the date_from/date_to
    values to respective "begin of month/year" and "end of month/year" values.

    Args:
        time_resolution: time resolution as enumeration
        date_from: datetime string or object
        date_to: datetime string or object

    Returns:
        Tuple of two Timestamps: "date_from" and "date_to"
    """

    if date_to is None:
        date_to = date_from

    if time_resolution == TimeResolution.ANNUAL:
        date_from = pd.to_datetime(date_from) - YearBegin(1)
        date_to = pd.to_datetime(date_to) + YearEnd(1)

    elif time_resolution == TimeResolution.MONTHLY:
        date_from = pd.to_datetime(date_from) - MonthBegin(1)
        date_to = pd.to_datetime(date_to) + MonthEnd(1)

    else:
        raise NotImplementedError(
            "mktimerange only implemented for annual and monthly time ranges")

    return date_from, date_to
Example #8
0
def generate_calendar(year, drop_index=False):
    '''
    Simple function to generate a calendar containing
    US holidays, weekdays and  holiday weeks.
    '''
    from pandas.tseries.offsets import YearEnd
    from pandas.tseries.holiday import USFederalHolidayCalendar

    start_date = pd.to_datetime('1/1/'+str(year))
    end_date = start_date + YearEnd()
    DAT = pd.date_range(str(start_date), str(end_date), freq='D')
    MO = [d.strftime('%B') for d in DAT]
    holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date)

    cal_df = pd.DataFrame({'date':DAT, 'month':MO})
    cal_df['year'] = [format(d, '%Y') for d in DAT]
    cal_df['weekday'] = [format(d, '%A') for d in DAT]
    cal_df['is_weekday'] = cal_df.weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday'])
    cal_df['is_weekday'] = cal_df['is_weekday'].astype(int)
    cal_df['is_holiday'] = cal_df['date'].isin(holidays)
    cal_df['is_holiday'] = cal_df['is_holiday'].astype(int)
    cal_df['is_holiday_week'] = cal_df.is_holiday.rolling(window=7,center=True,min_periods=1).sum()
    cal_df['is_holiday_week'] = cal_df['is_holiday_week'].astype(int)

    if not drop_index: cal_df.set_index('date', inplace=True)

    return cal_df
Example #9
0
 def __init__(self,
              n=1,
              normalize=False,
              weekmask='Mon Tue Wed Thu Fri',
              holidays=None,
              calendar=None,
              **kwds):
     object.__setattr__(self, "n", n)
     object.__setattr__(self, "normalized", normalize)
     self.kwds.update(kwds)
     object.__setattr__(self, "offset", timedelta(0))
     object.__setattr__(self, "month", self._default_month)
     try:
         kwds.pop('month')
     except Exception as e:
         pass
     object.__setattr__(
         self, "cbday",
         CustomBusinessDay(n=1,
                           normalize=normalize,
                           weekmask=weekmask,
                           holidays=holidays,
                           calendar=calendar,
                           **kwds))
     self.kwds['calendar'] = self.cbday.calendar
     object.__setattr__(self, "y_offset", YearEnd(1))
def merge_ccm_comp(ccm, comp):
    ccm1 = pd.merge(comp[['gvkey', 'datadate', 'be', 'count']],
                    ccm,
                    how='left',
                    on=['gvkey'])
    ccm1['yearend'] = ccm1['datadate'] + YearEnd(0)
    ccm1['jdate'] = ccm1['yearend'] + MonthEnd(6)

    # set link date bounds
    ccm2 = ccm1[(ccm1['jdate'] >= ccm1['linkdt'])
                & (ccm1['jdate'] <= ccm1['linkenddt'])]
    ccm2 = ccm2[[
        'gvkey', 'permno', 'datadate', 'yearend', 'jdate', 'be', 'count'
    ]]

    # keep the lastest be per gvkey, permno, jdate
    ccm2 = pd.merge(ccm2,
                    ccm2.groupby(['gvkey', 'permno',
                                  'jdate']).datadate.max().reset_index(),
                    on=['gvkey', 'permno', 'jdate', 'datadate'],
                    how='inner')

    # if several gvkeys for the same permno, keep the gvkey with the highest BE value
    ccm2 = pd.merge(ccm2,
                    ccm2.groupby(['permno', 'jdate']).be.max().reset_index(),
                    on=['permno', 'jdate', 'be'],
                    how='inner')
    return ccm2
Example #11
0
def calculateTTMValue(in_df,code):
    in_df_date = in_df['report_type'].map(lambda x: pd.to_datetime(x))

    df = pd.read_sql_query(
        "select code,report_type,yysr,jlr,lrze,kjlr,zzc,gdqy,jyjxjl,mgsy,roe,mgjyxjl,mgjzc,mgsy_ttm,mgjyxjl_ttm \
        from hexun_finance_basic \
        where code=%(name)s",
        db.engine, params={'name': code})

    df = df.append(in_df)
    i = df['report_type'].map(lambda x: pd.to_datetime(x))
    df3 = df.set_index(i)

    for index, row in df3.iterrows():
        if row.mgjyxjl_ttm is None or row.mgjyxjl_ttm == '':
            # 去年年底
            lastYearEnd = YearEnd().rollback(index)
            # offset = offset.strftime('%Y-%m-%d')
            lastYearQuart = index - pd.DateOffset(months=12)
            app.logger.debug(index.strftime('%Y-%m-%d') + ':' + lastYearEnd.strftime('%Y-%m-%d') + ':' + lastYearQuart.strftime(
                '%Y-%m-%d'))
            try:
                if index.quarter != 4:
                    n_mgsy = float(df3.loc[lastYearEnd].mgsy) - float(df3.loc[lastYearQuart].mgsy) + float(row.mgsy)
                    n_mgjyxjl = float(df3.loc[lastYearEnd].mgjyxjl) - float(df3.loc[lastYearQuart].mgjyxjl) + float(
                        row.mgjyxjl)
                else:
                    n_mgsy = float(row.mgsy)
                    n_mgjyxjl = float(row.mgjyxjl)

                df3.mgsy_ttm.loc[index] = n_mgsy
                df3.mgjyxjl_ttm.loc[index] = n_mgjyxjl

            except Exception, ex:
                app.logger.warn(traceback.format_exc())
                df3.mgsy_ttm.loc[index] = float(row.mgsy)
                df3.mgjyxjl_ttm.loc[index] = float(row.mgjyxjl)

            #数据位截取
            v_mgsy_ttm = round(df3.mgsy_ttm.loc[index],2)
            v_mgjyxjl_ttm = round(df3.mgjyxjl_ttm.loc[index],2)
            #零值处理
            v_mgsy_ttm = 0.01 if v_mgsy_ttm==0 else v_mgsy_ttm
            v_mgjyxjl_ttm = 0.01 if v_mgjyxjl_ttm == 0 else v_mgjyxjl_ttm

            df3.mgsy_ttm.loc[index] = v_mgsy_ttm
            df3.mgjyxjl_ttm.loc[index] = v_mgjyxjl_ttm
Example #12
0
class TestYearEnd(Base):
    _offset = YearEnd

    def test_misspecified(self):
        with pytest.raises(ValueError, match="Month must go from 1 to 12"):
            YearEnd(month=13)

    offset_cases = []
    offset_cases.append((YearEnd(), {
        datetime(2008, 1, 1): datetime(2008, 12, 31),
        datetime(2008, 6, 30): datetime(2008, 12, 31),
        datetime(2008, 12, 31): datetime(2009, 12, 31),
        datetime(2005, 12, 30): datetime(2005, 12, 31),
        datetime(2005, 12, 31): datetime(2006, 12, 31)
    }))

    offset_cases.append((YearEnd(0), {
        datetime(2008, 1, 1): datetime(2008, 12, 31),
        datetime(2008, 6, 30): datetime(2008, 12, 31),
        datetime(2008, 12, 31): datetime(2008, 12, 31),
        datetime(2005, 12, 30): datetime(2005, 12, 31)
    }))

    offset_cases.append((YearEnd(-1), {
        datetime(2007, 1, 1): datetime(2006, 12, 31),
        datetime(2008, 6, 30): datetime(2007, 12, 31),
        datetime(2008, 12, 31): datetime(2007, 12, 31),
        datetime(2006, 12, 29): datetime(2005, 12, 31),
        datetime(2006, 12, 30): datetime(2005, 12, 31),
        datetime(2007, 1, 1): datetime(2006, 12, 31)
    }))

    offset_cases.append((YearEnd(-2), {
        datetime(2007, 1, 1): datetime(2005, 12, 31),
        datetime(2008, 6, 30): datetime(2006, 12, 31),
        datetime(2008, 12, 31): datetime(2006, 12, 31)
    }))

    @pytest.mark.parametrize('case', offset_cases)
    def test_offset(self, case):
        offset, cases = case
        for base, expected in cases.items():
            assert_offset_equal(offset, base, expected)

    on_offset_cases = [(YearEnd(), datetime(2007, 12, 31), True),
                       (YearEnd(), datetime(2008, 1, 1), False),
                       (YearEnd(), datetime(2006, 12, 31), True),
                       (YearEnd(), datetime(2006, 12, 29), False)]

    @pytest.mark.parametrize('case', on_offset_cases)
    def test_onOffset(self, case):
        offset, dt, expected = case
        assert_onOffset(offset, dt, expected)
Example #13
0
def resample_index(dat, to_freq):
    '''
    使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错
    :param data:
    :param to_freq:
    :return:
    '''
    data=dat.copy()
    if to_freq=='M':
        data.index = data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()),
                                      data.index + MonthEnd())
    elif to_freq=='W':
        # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误
        week_day=5 #0-6分别对应周一至周日
        data.index=data.index.where(data.index==((data.index+Week(weekday=week_day))-Week()),data.index+Week(weekday=week_day))

    elif to_freq=='Y':
        data.index = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()),
                                      data.index + YearEnd())
    return data
Example #14
0
    def _time_format(self, start: str, end: str, freq='d'):
        '''轉換日期格式'''
        if freq == 'd':
            pass

        elif freq == 'm':
            start = pd.to_datetime(start, format='%Y%m') + MonthEnd(1)
            end = pd.to_datetime(end, format='%Y%m') + MonthEnd(1)

        elif freq == 'q':
            start = start[0:4] + start[4:6].replace('0', 'Q')
            start = pd.to_datetime(start) + QuarterEnd(1)

            end = end[0:4] + end[4:6].replace('0', 'Q')
            end = pd.to_datetime(end) + QuarterEnd(1)

        elif freq == 'y':
            start = pd.to_datetime(start) + YearEnd(1)
            end = pd.to_datetime(end) + YearEnd(1)
        return start, end
Example #15
0
def _split_by_year(tile, time_dim='time'):
    start_range = tile.sources[time_dim][0].data
    end_range = tile.sources[time_dim][-1].data

    for date in pd.date_range(start=YearBegin(normalize=True).rollback(start_range),
                              end=end_range,
                              freq='AS',
                              normalize=True):
        sources_slice = tile.sources.loc[{time_dim: slice(date, YearEnd(normalize=True).rollforward(date))}]
        year_str = '{0:%Y}'.format(date)
        yield year_str, Tile(sources=sources_slice, geobox=tile.geobox)
Example #16
0
class CustomBusinessYearEnd(YearOffset):
    _cacheable = False
    _prefix = 'CBYE'
    _default_month = 12

    def __init__(self,
                 n=1,
                 normalize=False,
                 weekmask='Mon Tue Wed Thu Fri',
                 holidays=None,
                 calendar=None,
                 **kwds):
        self.n = n
        self.normalize = normalize
        self.kwds.update(kwds)
        self.offset = kwds.get('offset', timedelta(0))
        self.month = kwds.get('month', self._default_month)
        try:
            kwds.pop('month')
        except Exception as e:
            pass
        self.cbday = CustomBusinessDay(n=1,
                                       normalize=normalize,
                                       weekmask=weekmask,
                                       holidays=holidays,
                                       calendar=calendar,
                                       **kwds)
        self.kwds['calendar'] = self.cbday.calendar
        self.y_offset = YearEnd(1)

    @apply_wraps
    def apply(self, other):
        n = self.n
        cur_yend = self.y_offset.rollforward(other)
        cur_cyend = self.cbday.rollback(cur_yend)

        if n == 0 and other != cur_cyend:
            n += 1
        if other < cur_cyend and n >= 1:
            n -= 1
        if other > cur_cyend and n <= -1:
            n += 1

        new = cur_yend + n * self.y_offset
        result = self.cbday.rollback(new)
        return result

    def onOffset(self, dt):
        if self.normalize and not _is_normalized(dt):
            return False
        if not self.cbday.onOffset(dt):
            return False
        return (dt + self.cbday).year != dt.year
Example #17
0
    def eoy(d, offset=0):
        """Unmodified end-of-year. Returns the last date of year for the
        same year as in date d

        The offset parameter represents the number of years that will be
        added (if offset > 0) or subtracted (if offset < 0) to input date d.
        This is especially useful for offset = -1, which gives you the EOY
        of previous year, for example.
        """
        d = to_datetime(d)
        # As in the case of the EOM, we leave the offset to a different
        # function (see comments of EOM function)
        return d + DateOffset(years=offset) + YearEnd(0)
        def get_data(self):
            """

            :return:
            """
            date = self.date
            date_1 = datetime.strptime(date, "%Y-%m-%d")
            if date_1.month >= 5:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(1)
            else:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(2)
            pre_year_date = pre_year_date.strftime("%Y-%m-%d")
            deductedprofit_data = w.wss(self.stockcodes, "deductedprofit",
                                        "unit=1;rptDate=" + pre_year_date)
            if deductedprofit_data.ErrorCode != 0:
                print("数据提取异常")
                raise Exception("数据提取异常")
            deductedprofit = FactorsZoo.check_data(deductedprofit_data.Data[0])

            return deductedprofit
        def get_data(self):
            """

            :return:
            """
            date = self.date
            date_1 = datetime.strptime(date, "%Y-%m-%d")
            if date_1.month >= 5:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(1)
            else:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(2)
            pre_year_date = pre_year_date.strftime("%Y")
            dividend_data = w.wss(self.stockcodes, "div_divpct_3yearaccu",
                                  "year=" + pre_year_date)
            if dividend_data.ErrorCode != 0:
                print("数据提取异常")
                raise Exception("数据提取异常")
            dividend = FactorsZoo.check_data(dividend_data.Data[0])

            return dividend
    def select_stock(self):
        """
        股票池选择标准如下:
        1:剔除当期*ST,ST个股
        2:剔除当期停牌的个股,剔除上市未满四年的个股
        3:剔除当期涨停的股票
        4:近三年经营活动现金流为正,ROE>10 OR FCF/销售收入>5 ,盈利增长速度协调一致
        5:近三年营业利润为正,且发行在外的总股本增长不明显
        :return:
        """
        stockdata = {}
        # 剔除*ST,ST
        stock = w.wset("sectorconstituent", "date=" + self.date + ";sectorid=a001010f00000000")
        stockdata['Codes'] = stock.Data[1]
        # 剔除当期停牌的个股

        status = w.wss(stockdata['Codes'], "trade_status", "tradeDate=" + self.date)
        stockdata['status'] = status.Data[0]

        df = DataFrame(stockdata)
        df = df[df['status'] == u'交易']
        # 剔除涨停的股票
        maxud = w.wss(df['Codes'].values.tolist(), "maxupordown", "tradeDate=" + self.date)
        df['maxud'] = maxud.Data[0]

        df = df[df['maxud'] < 1]
        # 剔除上市未满三年的股票
        ipo_days = w.wss(df['Codes'].values.tolist(), "ipo_listdays", "tradeDate=" + self.date)
        df['ipo_days'] = ipo_days.Data[0]
        df = df[df['ipo_days'] > 4 * 365]
        # 股票池标准
        date = self.date
        date_1 = datetime.strptime(date, "%Y-%m-%d")
        if date_1.month >= 5:
            pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1)
            pre_year_date1 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2)
            pre_year_date2 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(3)
        else:
            pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2)
            pre_year_date1 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(3)
            pre_year_date2 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(4)

        # roe > 5, pe > 0, inc > 0
        roe = w.wss(df['Codes'].values.tolist(), "roe", "rptDate=" + pre_year_date.strftime("%Y-%m-%d"))
        df['roe'] = roe.Data[0]
        pe = w.wss(df['Codes'].values.tolist(), "pe","tradeDate=" + pre_year_date.strftime("%Y-%m-%d")+ ";ruleType=10")
        df['pe'] = pe.Data[0]
        inc = w.wss(df['Codes'].values.tolist(), "wgsd_net_inc","unit=1;rptDate="+pre_year_date.strftime("%Y-%m-%d")+";rptType=1;currencyType=")
        df['net_inc'] = inc.Data[0]
        df = df[df['roe'] > 5]
        df = df[df['pe'] > 0]
        df = df[df['net_inc'] > 0]




        stockcodes = df['Codes'].values.tolist()

        return stockcodes
Example #21
0
class TestYearEndDiffMonth(Base):
    offset_cases = []
    offset_cases.append((YearEnd(month=3), {
        datetime(2008, 1, 1): datetime(2008, 3, 31),
        datetime(2008, 2, 15): datetime(2008, 3, 31),
        datetime(2008, 3, 31): datetime(2009, 3, 31),
        datetime(2008, 3, 30): datetime(2008, 3, 31),
        datetime(2005, 3, 31): datetime(2006, 3, 31),
        datetime(2006, 7, 30): datetime(2007, 3, 31)
    }))

    offset_cases.append((YearEnd(0, month=3), {
        datetime(2008, 1, 1): datetime(2008, 3, 31),
        datetime(2008, 2, 28): datetime(2008, 3, 31),
        datetime(2008, 3, 31): datetime(2008, 3, 31),
        datetime(2005, 3, 30): datetime(2005, 3, 31)
    }))

    offset_cases.append((YearEnd(-1, month=3), {
        datetime(2007, 1, 1): datetime(2006, 3, 31),
        datetime(2008, 2, 28): datetime(2007, 3, 31),
        datetime(2008, 3, 31): datetime(2007, 3, 31),
        datetime(2006, 3, 29): datetime(2005, 3, 31),
        datetime(2006, 3, 30): datetime(2005, 3, 31),
        datetime(2007, 3, 1): datetime(2006, 3, 31)
    }))

    offset_cases.append((YearEnd(-2, month=3), {
        datetime(2007, 1, 1): datetime(2005, 3, 31),
        datetime(2008, 6, 30): datetime(2007, 3, 31),
        datetime(2008, 3, 31): datetime(2006, 3, 31)
    }))

    @pytest.mark.parametrize('case', offset_cases)
    def test_offset(self, case):
        offset, cases = case
        for base, expected in compat.iteritems(cases):
            assert_offset_equal(offset, base, expected)

    on_offset_cases = [(YearEnd(month=3), datetime(2007, 3, 31), True),
                       (YearEnd(month=3), datetime(2008, 1, 1), False),
                       (YearEnd(month=3), datetime(2006, 3, 31), True),
                       (YearEnd(month=3), datetime(2006, 3, 29), False)]

    @pytest.mark.parametrize('case', on_offset_cases)
    def test_onOffset(self, case):
        offset, dt, expected = case
        assert_onOffset(offset, dt, expected)
        def get_data(self):
            """

            :return:
            """
            date = self.date
            date_1 = datetime.strptime(date, "%Y-%m-%d")
            if date_1.month >= 5:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(1)
            else:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(2)
            pre_year_date = pre_year_date.strftime("%Y-%m-%d")
            net_inc_data = w.wss(
                self.stockcodes, "wgsd_net_inc",
                "unit=1;rptDate=" + pre_year_date + ";rptType=1;currencyType=")
            if net_inc_data.ErrorCode != 0:
                print("数据提取异常")
                raise Exception("数据提取异常")
            net_inc = FactorsZoo.check_data(net_inc_data.Data[0])

            return net_inc
        def get_data(self):
            """

            :return:
            """
            date = self.date
            date_1 = datetime.strptime(date, "%Y-%m-%d")
            if date_1.month >= 5:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(1)
            else:
                pre_year_date = datetime.strptime(date,
                                                  "%Y-%m-%d") - YearEnd(2)
            pre_year_date = pre_year_date.strftime("%Y-%m-%d")
            cash_data = w.wss(self.stockcodes, "net_cash_flows_oper_act",
                              "unit=1;rptDate=" + pre_year_date + ";rptType=1")
            if cash_data.ErrorCode != 0:
                print("数据提取异常")
                raise Exception("数据提取异常")

            cash_net_oper_act = FactorsZoo.check_data(cash_data.Data[0])

            return cash_net_oper_act
    def deflate_factor(date, stockcodes, label=None):
        """
        BE:权益账目价值
        ME:股票总市值
        AT:总资产
        :param label: 选择缩减因子,包括三个BE,ME,AT
        :return:
        """
        date_1 = datetime.strptime(date, "%Y-%m-%d")
        if date_1.month >= 5:
            pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1)
        else:
            pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2)
        pre_year_date = pre_year_date.strftime("%Y-%m-%d")

        if label is None:
            return
        elif label == "BE":  # 提取权益账目价值
            be = w.wss(stockcodes, "tot_equity",
                       "unit=1;rptDate=" + pre_year_date + ";rptType=1")
            if be.ErrorCode != 0:
                raise Exception("提取数据异常")
            deflate_f = be.Data[0]
        elif label == "ME":  # 提取股票总市值
            me = w.wss(stockcodes, "mkt_cap_ard", "unit=1;tradeDate=" + date)
            if me.ErrorCode != 0:
                raise Exception("提取数据异常")
            deflate_f = me.Data[0]

        elif label == "AT":  # 提取总资产
            at = w.wss(
                stockcodes, "wgsd_assets",
                "unit=1;rptDate=" + pre_year_date + ";rptType=1;currencyType=")
            if at.ErrorCode != 0:
                raise Exception("提取数据异常")
            deflate_f = at.Data[0]
        return deflate_f
Example #25
0
 def set_current(self):
     now = datetime.date.today()
     offset_m, offset_q = MonthEnd(), QuarterEnd()
     self.newest_date['M'] = offset_m.rollback(now)
     self.newest_date['Q'] = offset_q.rollback(now)
     self.newest_date['D'] = now - timedelta(days=1)
     self.newest_date['Y'] = YearEnd().rollback(now)
     half1 = datetime.date(now.year, 6, 30)
     half2 = datetime.date(now.year, 12, 31)
     if now < half1:
         self.newest_date['H'] = datetime.date(now.year - 1, 12, 31)
     elif now < half2:
         self.newest_date['H'] = half1
     else:
         self.newest_date['H'] = half2
Example #26
0
 def __init__(self,
              n=1,
              normalize=False,
              weekmask='Mon Tue Wed Thu Fri',
              holidays=None,
              calendar=None,
              **kwds):
     self.n = n
     self.normalize = normalize
     self.kwds.update(kwds)
     self.offset = kwds.get('offset', timedelta(0))
     self.month = kwds.get('month', self._default_month)
     try:
         kwds.pop('month')
     except Exception as e:
         pass
     self.cbday = CustomBusinessDay(n=1,
                                    normalize=normalize,
                                    weekmask=weekmask,
                                    holidays=holidays,
                                    calendar=calendar,
                                    **kwds)
     self.kwds['calendar'] = self.cbday.calendar
     self.y_offset = YearEnd(1)
Example #27
0
def ecos(code='021Y125',
         item1='?',
         item2='?',
         item3='?',
         freq='Q',
         first='1900',
         last='2100',
         N='10000',
         detail=True,
         col=None):
    '''retreive monthly, quarterly, annul time series from ecos.
    run 'open_ecosapi() to explore ecos api codes.'''
    ecos_key = "http://ecos.bok.or.kr/api/StatisticSearch/390S6FIOF95M7MHASMEA"
    freq_str = {'QQ': 'Q', 'MM': '-'}
    freq += freq  # Y, Q, M, D -> YY, QQ, MM, DD
    url = f"{ecos_key}/json/kr/1/{N}/{code}/{freq}/{first}/{last}/{item1}/{item2}/{item3}/"
    result = urlopen(url)
    data = json.loads(result.read())
    data = data["StatisticSearch"]["row"]
    df = pd.DataFrame(data)
    if detail:
        print(
            f"통계: {df.loc[0, 'STAT_NAME']}",
            f"단위: {df.loc[0, 'UNIT_NAME']}",
            f"기간: {df.loc[0, 'TIME']} - {df.loc[df.index[-1], 'TIME']}",
            f"항목: {df.loc[0, 'ITEM_NAME1']}",
        )
    df = df.set_index("TIME")
    df.index.names = ['DATE']
    if (freq == 'MM'):
        df.index = pd.DatetimeIndex(df.index.str[:4] + freq_str[freq] +
                                    df.index.str[4:])
        df.index = df.index + MonthEnd()
    elif (freq == 'QQ'):
        df.index = pd.DatetimeIndex(df.index.str[:4] + freq_str[freq] +
                                    df.index.str[4:])
        df.index = df.index + QuarterEnd()
    elif (freq == 'YY'):
        df.index = pd.DatetimeIndex(df.index)
        df.index = df.index + YearEnd()
    elif (freq == 'DD'):
        df.index = pd.DatetimeIndex(df.index)
    else:
        print('frequency is not one of D, M, Q, A.')
        return
    df["DATA_VALUE"] = df["DATA_VALUE"].astype("float")
    return df['DATA_VALUE'].to_frame(col)
def generate_weeks(year):
    from pandas.tseries.offsets import YearEnd
    start_date = pd.to_datetime('1/1/'+str(year))
    end_date = start_date + YearEnd()
    DAT = pd.date_range(str(start_date), str(end_date), freq='D')
    WK = [d.strftime('%U') for d in DAT]
    MO = [d.strftime('%B') for d in DAT]

    DAYZ = pd.DataFrame({'Date':DAT, 'WeekNumber':WK, 'Month':MO})

    DAYZ.loc[DAYZ.WeekNumber.isin(['00','01','02','03','04','05','06','07','08','09','50','51','52','53']), 'Season'] = 'Winter'
    DAYZ.loc[DAYZ.WeekNumber.isin(['10','11','12','13','14','15','16','17','18','19','20','21','22']), 'Season'] = 'Spring'
    DAYZ.loc[DAYZ.WeekNumber.isin(['23','24','25','26','27','28','29','30','31','32','33','34','35']), 'Season'] = 'Summer'
    DAYZ.loc[DAYZ.WeekNumber.isin(['36','37','38','39','40','41','42','43','44','45','46','47','48','49']), 'Season'] = 'Autumn'

    DAYZ.reset_index(drop=True, inplace=True)
    
    return DAYZ
Example #29
0
def get_dividends(start, end, **kwargs):
    start = (pd.to_datetime(start) - YearBegin(1)).strftime("%Y%m%d")
    end = (pd.to_datetime(end) + YearEnd(1)).strftime("%Y%m%d")
    raw_dividends = uqer_db.run_api(
        "EquDivGet",
        beginDate=start,
        endDate=end,
        field=["endDate", "ticker", "publishDate", "perCashDiv"])
    raw_dividends.dropna(inplace=True)
    raw_dividends['endDate'] = (raw_dividends['endDate'].str.replace(
        '-', '')).astype('int')
    raw_dividends['publishDate'] = (raw_dividends['publishDate'].str.replace(
        '-', '')).astype('int')
    raw_dividends['ticker'] = raw_dividends['ticker'].astype('int')
    raw_dividends.sort_values(['ticker', 'endDate', 'publishDate'],
                              inplace=True)
    raw_dividends.rename(columns={
        'ticker': 'IDs',
        'endDate': 'date',
        'publishDate': 'ann_dt',
        'perCashDiv': 'dividend'
    },
                         inplace=True)
    h5db.save_h5file(raw_dividends, 'cash_div', '/dividends/')
Example #30
0
    'T'     : Minute(),
    'S'     : Second(),
    'L'     : Milli(),
    'U'     : Micro(),
    None    : None,

    # Monthly - Calendar
    'M'      : MonthEnd(),
    'MS'     : MonthBegin(),

    # Monthly - Business
    'BM'     : BMonthEnd(),
    'BMS'    : BMonthBegin(),

    # Annual - Calendar
    'A-JAN' : YearEnd(month=1),
    'A-FEB' : YearEnd(month=2),
    'A-MAR' : YearEnd(month=3),
    'A-APR' : YearEnd(month=4),
    'A-MAY' : YearEnd(month=5),
    'A-JUN' : YearEnd(month=6),
    'A-JUL' : YearEnd(month=7),
    'A-AUG' : YearEnd(month=8),
    'A-SEP' : YearEnd(month=9),
    'A-OCT' : YearEnd(month=10),
    'A-NOV' : YearEnd(month=11),
    'A-DEC' : YearEnd(month=12),
    'A'     : YearEnd(month=12),

    # Annual - Calendar (start)
    'AS-JAN' : YearBegin(month=1),
Example #31
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))

    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(
                      date_range('20130101', periods=5, tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(float=DataFrame({
        'A': series['float'],
        'B': series['float'] + 1
    }),
                 int=DataFrame({
                     'A': series['int'],
                     'B': series['int'] + 1
                 }),
                 mixed=DataFrame({k: data[k]
                                  for k in ['A', 'B', 'C', 'D']}),
                 mi=DataFrame(
                     {
                         'A': np.arange(5).astype(np.float64),
                         'B': np.arange(5).astype(np.int64)
                     },
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=['A', 'B', 'A']),
                 cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}),
                 cat_and_float=DataFrame({
                     'A':
                     Categorical(['foo', 'bar', 'baz']),
                     'B':
                     np.arange(3).astype(np.int64)
                 }),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET')
                     },
                     index=range(5)),
                 dt_mixed2_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET'),
                         'C': Timestamp('20130603', tz='UTC')
                     },
                     index=range(5)))

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)