コード例 #1
0
    def record(self, entity, start, end, size, timestamps):
        q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter(
            finance.FUND_PORTFOLIO_STOCK.code == entity.code)
        df = finance.run_query(q)
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df['timestamp'] = pd.to_datetime(df['pub_date'])

            df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True)
            df['proportion'] = df['proportion'] * 0.01

            df = portfolio_relate_stock(df, entity)

            df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x))
            df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['report_date'] = pd.to_datetime(df['period_end'])
            df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x))

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(f"persist etf {entity.code} portfolio success")

        return None
コード例 #2
0
    def format(self, entity, df):
        #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
        # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
        # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
        # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
        # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True)
        df['proportion'] *= 0.01
        df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x))
        df['report_date'] = pd.to_datetime(df['period_end'])
        df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x))

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['entity_type'] = entity.entity_type
        df['exchange'] = entity.exchange
        df['code'] = entity.code
        df['name'] = entity.name

        df['id'] = self.generate_domain_id(entity, df)
        return df
コード例 #3
0
ファイル: jq_stock_meta_recorder.py プロジェクト: zvtvz/zvt
    def record(self, entity, start, end, size, timestamps):
        df = run_query(
            table="finance.FUND_PORTFOLIO_STOCK",
            conditions=f"pub_date#>=#{to_time_str(start)}&code#=#{entity.code}",
            parse_dates=None,
        )
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df["timestamp"] = pd.to_datetime(df["pub_date"])

            df.rename(columns={
                "symbol": "stock_code",
                "name": "stock_name"
            },
                      inplace=True)
            df["proportion"] = df["proportion"] * 0.01

            df = portfolio_relate_stock(df, entity)

            df["stock_id"] = df["stock_code"].apply(
                lambda x: china_stock_code_to_id(x))
            df["id"] = df[["entity_id", "stock_id", "pub_date",
                           "id"]].apply(lambda x: "_".join(x.astype(str)),
                                        axis=1)
            df["report_date"] = pd.to_datetime(df["period_end"])
            df["report_period"] = df["report_type"].apply(
                lambda x: jq_to_report_period(x))

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(
                f"persist etf {entity.code} portfolio success {df.iloc[-1]['pub_date']}"
            )

        return None
コード例 #4
0
    def record(self, entity, start, end, size, timestamps):
        # 忽略退市的
        if entity.end_date:
            return None
        redundant_times = 1
        while redundant_times > 0:
            df = run_query(
                table='finance.FUND_PORTFOLIO_STOCK',
                conditions=
                f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}',
                parse_dates=None)
            df = df.dropna()
            if pd_is_not_null(df):
                # data format
                #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
                # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
                # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
                # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
                # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
                df['timestamp'] = pd.to_datetime(df['pub_date'])

                df.rename(columns={
                    'symbol': 'stock_code',
                    'name': 'stock_name'
                },
                          inplace=True)
                df['proportion'] = df['proportion'] * 0.01

                df = portfolio_relate_stock(df, entity)

                df['stock_id'] = df['stock_code'].apply(
                    lambda x: china_stock_code_to_id(x))
                df['id'] = df[['entity_id', 'stock_id', 'pub_date',
                               'id']].apply(lambda x: '_'.join(x.astype(str)),
                                            axis=1)
                df['report_date'] = pd.to_datetime(df['period_end'])
                df['report_period'] = df['report_type'].apply(
                    lambda x: jq_to_report_period(x))

                saved = df_to_db(df=df,
                                 data_schema=self.data_schema,
                                 provider=self.provider,
                                 force_update=self.force_update)

                # 取不到非重复的数据
                if saved == 0:
                    return None

                # self.logger.info(df.tail())
                self.logger.info(
                    f"persist fund {entity.code}({entity.name}) portfolio success {df.iloc[-1]['pub_date']}"
                )
                latest = df['timestamp'].max()

                # 取到了最近两年的数据,再请求一次,确保取完最新的数据
                if latest.year >= now_pd_timestamp().year - 1:
                    redundant_times = redundant_times - 1
                start = latest
            else:
                return None

        return None