예제 #1
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 500)  # 取到2年之前的数据
        # Invested Capital = 资产总计121 - 流动负债101+ 应付票据68 + 短期借款109 + 一年内到期的长期负债0
        bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date,['ticker', 121, 101, 68, 109, 0]]
        bs['IC'] = bs[121] - bs[101] + bs[68] + bs[109] + bs[0]
        bs = bs.drop([121, 101, 68, 109, 0], axis=1)
        self.bs = bs.dropna()

        # EBT = 归母净利润40 + 财务费用56
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40, 56]]
        inst = inst[(inst[56] > 1) | (inst[56] < -1)].copy()
        inst['return'] = inst[40] + inst[56]
        inst = inst.drop([40, 56], axis=1)
        inst.dropna(inplace=True)

        inst['release_date'] = inst.index
        inst['report_date'] = inst.index

        returnTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                return_df = ttmContinues(inst[inst['ticker'] == ticker], 'return')
                return_df['ticker'] = ticker
            except:
                # print(ticker + ': revenue error')
                continue
            returnTTM_ls.append(return_df)

        self.inst = pd.concat(returnTTM_ls)
        self.inst.set_index('datetime', inplace=True)
예제 #2
0
파일: 4.py 프로젝트: zhouqiw/gupiao
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """

        shifted_begin_date = shift_date(begin_date, 500) # 向前取500个交易日

        # 取利润表中“归属于母公司股东的净利润”项目,项目名称及数字详见FundDict
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40]]
        inst['motherNetProfit'] = inst[40]
        inst.drop(40, axis=1, inplace=True)

        # ttm算法需要“财报发布日”与“财报报告日”两个日期作为参数
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index

        # 净利润ttm
        profitTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit')
                reven_df['ticker'] = ticker
            except:
                continue
            profitTTM_ls.append(reven_df)
        self.profitTTM = pd.concat(profitTTM_ls)

        # 取“OtherData”中总市值数据
        # Tushare的市值数据只有17年6月->now
        df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)
예제 #3
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """
        shifted_begin_date = shift_date(begin_date, 500)
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40]]
        inst['motherNetProfit'] = inst[40]
        inst.drop(40, axis=1, inplace=True)
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index

        profitTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit')
                reven_df['ticker'] = ticker
            except:
                continue
            profitTTM_ls.append(reven_df)

        # 净利润ttm
        self.profitTTM = pd.concat(profitTTM_ls)
        # self.profitTTM.set_index('datetime', inplace=True)

        # 总市值
        # Tushare的市值数据只有17年-now
        df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)
예제 #4
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 800)
        bs = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 86]]
        bs['release_date'] = bs.index
        bs['report_date'] = bs.index
        bs['motherEquity'] = bs[86]

        # 归母权益
        equity_mean = []
        for ticker in bs['ticker'].unique():
            try:
                tmp_equity = ttmDiscrete(bs[bs['ticker'] == ticker],
                                         'motherEquity', 5)
                tmp_equity['ticker'] = ticker
            except:
                continue
            equity_mean.append(tmp_equity)

        equity_mean = pd.concat(equity_mean)

        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 40]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['motherNetProfit'] = inst[40]

        # 归母净利润
        net_profit = []
        for ticker in inst['ticker'].unique():
            try:
                tmp_profit = ttmContinues(inst[inst['ticker'] == ticker],
                                          'motherNetProfit')
                tmp_profit['ticker'] = ticker
            except:
                continue
            net_profit.append(tmp_profit)

        net_profit = pd.concat(net_profit)

        # 时间排序处理
        equity_mean['report_date'] = equity_mean['report_date'].apply(
            lambda x: x.strftime("%Y-%m-%d"))
        net_profit['report_date'] = net_profit['report_date'].apply(
            lambda x: x.strftime("%Y-%m-%d"))

        self.equity_mean = equity_mean.sort_values(
            by=['report_date', 'datetime'], ascending=[False, False])
        self.net_profit = net_profit.sort_values(
            by=['report_date', 'datetime'], ascending=[False, False])
예제 #5
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """

        #  净资产周转率 = 营业收入_TTM / 净资产总计_TTM
        #  净资产总计=总资产-负债总额
        #  营业收入_TTM为最近4个季度报告期的营业收入之和,
        #  净资产总计_TTM为最近5个季度报告期总资产的平均值。
        #  Net asset turnover ratio = netAssets / totalLiabilities

        #  获取财务数据:
        shifted_begin_date = shift_date(begin_date, 500)
        #117负债, 121资产
        netAssets = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 117, 121]]
        netAssets['netAssets'] =  netAssets[121] - netAssets[117]
        netAssets.drop([117, 121], axis=1, inplace=True)
        netAssets = netAssets[netAssets['netAssets'] :0]
        netAssets['report_date'] = netAssets.index
        netAssets['release_date'] = netAssets.index

        netAssetsTTM_ls = []
        for ticker in netAssets['ticker'].unique():
            try:
                netAssets_df = ttmDiscrete(netAssets[netAssets['ticker'] == ticker], 'netAssets')
                netAssets_df['ticker'] = ticker
            except:
                # print(ticker + ': net asset error')
                continue
            netAssetsTTM_ls.append(netAssets_df)

        #0营业收入
        revenue = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0]]
        revenue['revenue'] =  revenue[0]
        revenue.drop([0], axis=1, inplace=True)
        revenue['report_date'] = revenue.index
        revenue['release_date'] = revenue.index

        revenueTTM_ls = []
        for ticker in revenue['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(revenue[revenue['ticker'] == ticker], 'revenue')
                reven_df['ticker'] = ticker
            except:
                # print(ticker + ': revenue error')
                continue
            revenueTTM_ls.append(reven_df)

        self.revenueTTM = pd.concat(revenueTTM_ls)
        self.netAssetsTTM = pd.concat(netAssetsTTM_ls)
예제 #6
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """
        # 获取财务数据:
        # CATurnover = currentAssets 103 / revenue 0
        shifted_begin_date = shift_date(begin_date, 500)
        bs = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 103]]
        bs['release_date'] = bs.index
        bs['report_date'] = bs.index
        bs['currentAssets'] = bs[103]
        bs.drop(103, axis=1, inplace=True)

        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 0]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['revenue'] = inst[0]
        inst.drop([0], axis=1, inplace=True)

        # TTM Continues处理
        revenueTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker],
                                        'revenue')
                reven_df['ticker'] = ticker
            except:
                print(ticker + ': revenue error')
                continue
            revenueTTM_ls.append(reven_df)

        # TTM Discrete 取近期平均
        currentAssetsTTM_ls = []
        for ticker in bs['ticker'].unique():
            try:
                currentAssets_df = ttmDiscrete(bs[bs['ticker'] == ticker],
                                               'currentAssets')
                currentAssets_df['ticker'] = ticker
            except:
                print(ticker + ': current asset error')
                continue
            currentAssetsTTM_ls.append(currentAssets_df)

        self.revenueTTM = pd.concat(revenueTTM_ls)
        self.currentAssetsTTM = pd.concat(currentAssetsTTM_ls)
예제 #7
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 700)
        # totalAssets 121
        bs = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 121]]
        bs['release_date'] = bs.index
        bs['report_date'] = bs.index
        bs['totalAssets'] = bs[121]
        bs.drop(121, axis=1, inplace=True)

        # revenue 0, cost 4
        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 0, 4]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['revenue'] = inst[0]
        inst.drop(0, axis=1, inplace=True)

        revenueTTM_ls = []
        totalAssetsTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker],
                                        'revenue')
                reven_df['ticker'] = ticker
            except:
                print(ticker + ': revenue error')
                continue
            revenueTTM_ls.append(reven_df)

        for ticker in bs['ticker'].unique():
            try:
                total_asset_df = ttmDiscrete(bs[bs['ticker'] == ticker],
                                             'totalAssets')
                total_asset_df['ticker'] = ticker
            except:
                print(ticker + ': total asset error')
                continue
            totalAssetsTTM_ls.append(total_asset_df)

        self.revenueTTM = pd.concat(revenueTTM_ls)
        self.totalAssetsTTM = pd.concat(totalAssetsTTM_ls)
예제 #8
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 700)
        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 0, 4]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['revenue'] = inst[0]
        inst['cost'] = inst[4]
        inst.drop([0, 4], axis=1, inplace=True)

        revenueTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker],
                                        'revenue,cost')
                reven_df['ticker'] = ticker
            except:
                print(ticker + ': revenue and cost error')
                continue
            revenueTTM_ls.append(reven_df)

        self.revenue_cost_TTM = pd.concat(revenueTTM_ls)
예제 #9
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 500)
        # motherNetProfit 40
        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 40]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        # cash_flows_yield 133
        cf = cp.concat_fund(self.data_source, self.tickers,
                            'CF').loc[shifted_begin_date:end_date,
                                      ['ticker', 133]]
        cf['release_date'] = cf.index
        cf['report_date'] = cf.index

        self.accrual_df = cf.merge(
            inst, on=['ticker', 'release_date', 'report_date'])
        self.accrual_df['accr'] = self.accrual_df[40] - self.accrual_df[133]

        cash_flow_ls = []
        for ticker in self.accrual_df['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(
                    self.accrual_df[self.accrual_df['ticker'] == ticker],
                    'accr')
                reven_df['ticker'] = ticker
            except:
                continue
            cash_flow_ls.append(reven_df)

        self.accrual_ttm = pd.concat(cash_flow_ls)
        # 总市值
        # Tushare的市值数据只有17年-now
        df = market_value(self.data_source + '\\other\\otherdata.csv',
                          self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)