Example #1
0
    def record(self, entity, start, end, size, timestamps):
        jq_code = code_map_jq.get(entity.code)

        df = run_query(
            table='finance.STK_MT_TOTAL',
            conditions=
            f'exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}',
            parse_dates=['date'])
        print(df)

        json_results = []

        for item in df.to_dict(orient='records'):
            result = {
                'provider': self.provider,
                'timestamp': item['date'],
                'name': entity.name,
                'margin_value': item['fin_value'],
                'margin_buy': item['fin_buy_value'],
                'short_value': item['sec_value'],
                'short_volume': item['sec_sell_volume'],
                'total_value': item['fin_sec_value']
            }

            json_results.append(result)

        if len(json_results) < 100:
            self.one_shot = True

        return json_results
Example #2
0
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            df = run_query(
                table="finance.STK_HK_HOLD_INFO",
                conditions=
                f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}")
            print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    "day": "timestamp",
                    "link_id": "holder_code",
                    "link_name": "holder_name"
                },
                          inplace=True)
                df["timestamp"] = pd.to_datetime(df["timestamp"])

                df["entity_id"] = df["code"].apply(
                    lambda x: to_entity_id(entity_type="stock", jq_code=x))
                df["code"] = df["code"].apply(lambda x: x.split(".")[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply(
                    lambda se: "{}_{}_{}".format(
                        se["holder_name"], se["entity_id"],
                        to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)),
                    axis=1,
                )

                df_to_db(df=df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
Example #3
0
    def record(self, entity, start, end, size, timestamps):
        df = run_query(table='finance.FUND_PORTFOLIO_STOCK',
                       conditions=f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}',
                       parse_dates=None)
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df['timestamp'] = pd.to_datetime(df['pub_date'])

            df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True)
            df['proportion'] = df['proportion'] * 0.01

            df = portfolio_relate_stock(df, entity)

            df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x))
            df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['report_date'] = pd.to_datetime(df['period_end'])
            df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x))

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(f"persist etf {entity.code} portfolio success {df.iloc[-1]['pub_date']}")

        return None
Example #4
0
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            df = run_query(
                table='finance.STK_HK_HOLD_INFO',
                conditions=
                f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}')
            print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    'day': 'timestamp',
                    'link_id': 'holder_code',
                    'link_name': 'holder_name'
                },
                          inplace=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'])

                df['entity_id'] = df['code'].apply(
                    lambda x: to_entity_id(entity_type='stock', jq_code=x))
                df['code'] = df['code'].apply(lambda x: x.split('.')[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply(
                    lambda se: "{}_{}_{}".format(
                        se['holder_name'], se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)),
                    axis=1)

                df_to_db(df=df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
Example #5
0
    def record(self, entity, start, end, size, timestamps):
        df = run_query(table='finance.STK_ML_QUOTA', conditions=f'link_id#=#{entity.code}&day#>=#{to_time_str(start)}')
        print(df)

        json_results = []

        for item in df.to_dict(orient='records'):
            result = {
                'provider': self.provider,
                'timestamp': item['day'],
                'name': entity.name,
                'buy_amount': multiple_number(item['buy_amount'], 100000000),
                'buy_volume': item['buy_volume'],
                'sell_amount': multiple_number(item['sell_amount'], 100000000),
                'sell_volume': item['sell_volume'],
                'quota_daily': multiple_number(item['quota_daily'], 100000000),
                'quota_daily_balance': multiple_number(item['quota_daily_balance'], 100000000)
            }

            json_results.append(result)

        if len(json_results) < 100:
            self.one_shot = True

        return json_results
Example #6
0
    def record(self, entity, start, end, size, timestamps):
        jq_code = code_map_jq.get(entity.code)

        df = run_query(
            table="finance.STK_MT_TOTAL",
            conditions=
            f"exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}",
            parse_dates=["date"],
        )
        print(df)

        json_results = []

        for item in df.to_dict(orient="records"):
            result = {
                "provider": self.provider,
                "timestamp": item["date"],
                "name": entity.name,
                "margin_value": item["fin_value"],
                "margin_buy": item["fin_buy_value"],
                "short_value": item["sec_value"],
                "short_volume": item["sec_sell_volume"],
                "total_value": item["fin_sec_value"],
            }

            json_results.append(result)

        if len(json_results) < 100:
            self.one_shot = True

        return json_results
Example #7
0
    def record(self, entity, start, end, size, timestamps):
        df = run_query(table="finance.STK_ML_QUOTA", conditions=f"link_id#=#{entity.code}&day#>=#{to_time_str(start)}")
        print(df)

        json_results = []

        for item in df.to_dict(orient="records"):
            result = {
                "provider": self.provider,
                "timestamp": item["day"],
                "name": entity.name,
                "buy_amount": multiple_number(item["buy_amount"], 100000000),
                "buy_volume": item["buy_volume"],
                "sell_amount": multiple_number(item["sell_amount"], 100000000),
                "sell_volume": item["sell_volume"],
                "quota_daily": multiple_number(item["quota_daily"], 100000000),
                "quota_daily_balance": multiple_number(item["quota_daily_balance"], 100000000),
            }

            json_results.append(result)

        if len(json_results) < 100:
            self.one_shot = True

        return json_results
Example #8
0
    def run(self):
        # 按不同类别抓取
        # 编码	基金运作方式
        # 401001	开放式基金
        # 401002	封闭式基金
        # 401003	QDII
        # 401004	FOF
        # 401005	ETF
        # 401006	LOF
        for operate_mode_id in (401001, 401002, 401005):
            year_count = 2
            while True:
                latest = Fund.query_data(
                    filters=[Fund.operate_mode_id == operate_mode_id],
                    order=Fund.timestamp.desc(),
                    limit=1,
                    return_type="domain",
                )
                start_timestamp = "2000-01-01"
                if latest:
                    start_timestamp = latest[0].timestamp

                end_timestamp = min(
                    next_date(start_timestamp, 365 * year_count),
                    now_pd_timestamp())

                df = run_query(
                    table="finance.FUND_MAIN_INFO",
                    conditions=
                    f"operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}",
                    parse_dates=["start_date", "end_date"],
                    dtype={"main_code": str},
                )
                if not pd_is_not_null(df) or (df["start_date"].max().year <
                                              end_timestamp.year):
                    year_count = year_count + 1

                if pd_is_not_null(df):
                    df.rename(columns={"start_date": "timestamp"},
                              inplace=True)
                    df["timestamp"] = pd.to_datetime(df["timestamp"])
                    df["list_date"] = df["timestamp"]
                    df["end_date"] = pd.to_datetime(df["end_date"])

                    df["code"] = df["main_code"]
                    df["entity_id"] = df["code"].apply(
                        lambda x: to_entity_id(entity_type="fund", jq_code=x))
                    df["id"] = df["entity_id"]
                    df["entity_type"] = "fund"
                    df["exchange"] = "sz"
                    df_to_db(df,
                             data_schema=Fund,
                             provider=self.provider,
                             force_update=self.force_update)
                    self.logger.info(
                        f"persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}"
                    )

                if is_same_date(end_timestamp, now_pd_timestamp()):
                    break
Example #9
0
    def run(self):
        # 按不同类别抓取
        # 编码	基金运作方式
        # 401001	开放式基金
        # 401002	封闭式基金
        # 401003	QDII
        # 401004	FOF
        # 401005	ETF
        # 401006	LOF
        for operate_mode_id in (401001, 401002, 401005):
            year_count = 2
            while True:
                latest = Fund.query_data(
                    filters=[Fund.operate_mode_id == operate_mode_id],
                    order=Fund.timestamp.desc(),
                    limit=1,
                    return_type='domain')
                start_timestamp = '2000-01-01'
                if latest:
                    start_timestamp = latest[0].timestamp

                end_timestamp = min(
                    next_date(start_timestamp, 365 * year_count),
                    now_pd_timestamp())

                df = run_query(
                    table='finance.FUND_MAIN_INFO',
                    conditions=
                    f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}',
                    parse_dates=['start_date', 'end_date'],
                    dtype={'main_code': str})
                if not pd_is_not_null(df) or (df['start_date'].max().year <
                                              end_timestamp.year):
                    year_count = year_count + 1

                if pd_is_not_null(df):
                    df.rename(columns={'start_date': 'timestamp'},
                              inplace=True)
                    df['timestamp'] = pd.to_datetime(df['timestamp'])
                    df['list_date'] = df['timestamp']
                    df['end_date'] = pd.to_datetime(df['end_date'])

                    df['code'] = df['main_code']
                    df['entity_id'] = df['code'].apply(
                        lambda x: to_entity_id(entity_type='fund', jq_code=x))
                    df['id'] = df['entity_id']
                    df['entity_type'] = 'fund'
                    df['exchange'] = 'sz'
                    df_to_db(df,
                             data_schema=Fund,
                             provider=self.provider,
                             force_update=self.force_update)
                    self.logger.info(
                        f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}'
                    )

                if is_same_date(end_timestamp, now_pd_timestamp()):
                    break
Example #10
0
def jq_run_query(table='finance.STK_EXCHANGE_TRADE_INFO',
                 columns=None,
                 conditions=None,
                 count=1000,
                 dtype={
                     'code': str,
                     'symbol': str
                 },
                 parse_dates=['day', 'pub_date']):
    try:
        return jq.run_query(table=table,
                            columns=columns,
                            conditions=conditions,
                            count=count,
                            dtype=dtype,
                            parse_dates=parse_dates)
    except Exception as e:
        logger.error(f'jq_run_query, code: {dtype["code"]}, error: {e}')
    return None
Example #11
0
    def record(self, entity, start, end, size, timestamps):
        jq_code = code_map_jq.get(entity.code)

        df = run_query(
            table="finance.STK_EXCHANGE_TRADE_INFO",
            conditions=
            f"exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}",
            parse_dates=["date"],
        )
        print(df)

        json_results = []

        for item in df.to_dict(orient="records"):
            result = {
                "provider":
                self.provider,
                "timestamp":
                item["date"],
                "name":
                entity.name,
                "pe":
                item["pe_average"],
                "total_value":
                multiple_number(item["total_market_cap"], 100000000),
                "total_tradable_vaule":
                multiple_number(item["circulating_market_cap"], 100000000),
                "volume":
                multiple_number(item["volume"], 10000),
                "turnover":
                multiple_number(item["money"], 100000000),
                "turnover_rate":
                item["turnover_ratio"],
            }

            json_results.append(result)

        if len(json_results) < 100:
            self.one_shot = True

        return json_results
Example #12
0
    def record(self, entity, start, end, size, timestamps):
        jq_code = code_map_jq.get(entity.code)

        df = run_query(
            table='finance.STK_EXCHANGE_TRADE_INFO',
            conditions=
            f'exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}',
            parse_dates=['date'])
        print(df)

        json_results = []

        for item in df.to_dict(orient='records'):
            result = {
                'provider':
                self.provider,
                'timestamp':
                item['date'],
                'name':
                entity.name,
                'pe':
                item['pe_average'],
                'total_value':
                multiple_number(item['total_market_cap'], 100000000),
                'total_tradable_vaule':
                multiple_number(item['circulating_market_cap'], 100000000),
                'volume':
                multiple_number(item['volume'], 10000),
                'turnover':
                multiple_number(item['money'], 100000000),
                'turnover_rate':
                item['turnover_ratio']
            }

            json_results.append(result)

        if len(json_results) < 100:
            self.one_shot = True

        return json_results
Example #13
0
    def record(self, entity, start, end, size, timestamps):
        # 忽略退市的
        if entity.end_date:
            return None
        redundant_times = 1
        while redundant_times > 0:
            df = run_query(
                table="finance.FUND_PORTFOLIO_STOCK",
                conditions=
                f"pub_date#>=#{to_time_str(start)}&code#=#{entity.code}",
                parse_dates=None,
            )
            df = df.dropna()
            if pd_is_not_null(df):
                # data format
                #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
                # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
                # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
                # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
                # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
                df["timestamp"] = pd.to_datetime(df["pub_date"])

                df.rename(columns={
                    "symbol": "stock_code",
                    "name": "stock_name"
                },
                          inplace=True)
                df["proportion"] = df["proportion"] * 0.01

                df = portfolio_relate_stock(df, entity)

                df["stock_id"] = df["stock_code"].apply(
                    lambda x: china_stock_code_to_id(x))
                df["id"] = df[["entity_id", "stock_id", "pub_date",
                               "id"]].apply(lambda x: "_".join(x.astype(str)),
                                            axis=1)
                df["report_date"] = pd.to_datetime(df["period_end"])
                df["report_period"] = df["report_type"].apply(
                    lambda x: jq_to_report_period(x))

                saved = df_to_db(df=df,
                                 data_schema=self.data_schema,
                                 provider=self.provider,
                                 force_update=self.force_update)

                # 取不到非重复的数据
                if saved == 0:
                    return None

                # self.logger.info(df.tail())
                self.logger.info(
                    f"persist fund {entity.code}({entity.name}) portfolio success {df.iloc[-1]['pub_date']}"
                )
                latest = df["timestamp"].max()

                # 取到了最近两年的数据,再请求一次,确保取完最新的数据
                if latest.year >= now_pd_timestamp().year - 1:
                    redundant_times = redundant_times - 1
                start = latest
            else:
                return None

        return None