Exemple #1
0
    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == "null" or resp.text is None:
                    break
                category_jsons = demjson3.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category["code"]
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append(
                        {
                            "id": "{}_{}".format(block_id, stock_id),
                            "entity_id": block_id,
                            "entity_type": "block",
                            "exchange": entity.exchange,
                            "code": entity.code,
                            "name": entity.name,
                            "timestamp": now_pd_timestamp(),
                            "stock_id": stock_id,
                            "stock_code": stock_code,
                            "stock_name": category["name"],
                        }
                    )
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

                self.logger.info("finish recording BlockStock:{},{}".format(entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()
Exemple #2
0
    def record(self, entity, start, end, size, timestamps):
        resp = requests.get(self.category_stocks_url.format(entity.code, "1"), headers=DEFAULT_HEADER)
        try:
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(",")
                stock_code = items[1]
                stock_id = china_stock_code_to_id(stock_code)
                block_id = entity.id

                the_list.append(
                    {
                        "id": "{}_{}".format(block_id, stock_id),
                        "entity_id": block_id,
                        "entity_type": "block",
                        "exchange": entity.exchange,
                        "code": entity.code,
                        "name": entity.name,
                        "timestamp": now_pd_timestamp(),
                        "stock_id": stock_id,
                        "stock_code": stock_code,
                        "stock_name": items[2],
                    }
                )
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

            self.logger.info("finish recording block:{},{}".format(entity.category, entity.name))

        except Exception as e:
            self.logger.error("error:,resp.text:", e, resp.text)
        self.sleep()
    def download_sz_etf_component(self, df: pd.DataFrame):
        query_url = "http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml"

        self.parse_sz_etf_underlying_index(df)
        for _, etf in df.iterrows():
            underlying_index = etf["拟合指数"]
            etf_code = etf["证券代码"]

            if len(underlying_index) == 0:
                self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数,跳过...')
                continue

            url = query_url.format(underlying_index)
            response = requests.get(url)
            response.encoding = "gbk"

            try:
                dfs = pd.read_html(response.text, header=1)
            except ValueError as error:
                self.logger.error(
                    f"HTML parse error: {error}, response: {response.text}")
                continue

            if len(dfs) < 4:
                continue

            response_df = dfs[3].copy()
            response_df = response_df.dropna(axis=1, how="any")
            response_df["品种代码"] = response_df["品种代码"].apply(
                lambda x: f"{x:06d}")

            etf_id = f"etf_sz_{etf_code}"
            response_df = response_df[["品种代码", "品种名称"]].copy()
            response_df.rename(columns={
                "品种代码": "stock_code",
                "品种名称": "stock_name"
            },
                               inplace=True)

            response_df["entity_id"] = etf_id
            response_df["entity_type"] = "etf"
            response_df["exchange"] = "sz"
            response_df["code"] = etf_code
            response_df["name"] = etf["证券简称"]
            response_df["timestamp"] = now_pd_timestamp()

            response_df["stock_id"] = response_df["stock_code"].apply(
                lambda code: china_stock_code_to_id(code))
            response_df["id"] = response_df["stock_id"].apply(
                lambda x: f"{etf_id}_{x}")

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
    def download_sh_etf_component(self, df: pd.DataFrame):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = (
            "http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?"
            "isPagination=false&type={}&etfClass={}")

        etf_df = df[(df["ETF_CLASS"] == "1") | (df["ETF_CLASS"] == "2")]
        etf_df = self.populate_sh_etf_type(etf_df)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf["ETF_TYPE"], etf["ETF_CLASS"])
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson3.decode(response.text)
            response_df = pd.DataFrame(response_dict.get("result", []))

            etf_code = etf["FUND_ID"]
            etf_id = f"etf_sh_{etf_code}"
            response_df = response_df[["instrumentId",
                                       "instrumentName"]].copy()
            response_df.rename(columns={
                "instrumentId": "stock_code",
                "instrumentName": "stock_name"
            },
                               inplace=True)

            response_df["entity_id"] = etf_id
            response_df["entity_type"] = "etf"
            response_df["exchange"] = "sh"
            response_df["code"] = etf_code
            response_df["name"] = etf["FUND_NAME"]
            response_df["timestamp"] = now_pd_timestamp()

            response_df["stock_id"] = response_df["stock_code"].apply(
                lambda code: china_stock_code_to_id(code))
            response_df["id"] = response_df["stock_id"].apply(
                lambda x: f"{etf_id}_{x}")

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
Exemple #5
0
def get_cn_index_stock(code, timestamp, name=None):
    entity_type = "index"
    exchange = "sz"
    entity_id = f"{entity_type}_{exchange}_{code}"
    data_str = to_time_str(timestamp, TIME_FORMAT_MON)
    resp = requests.get(url.format(code, data_str), headers=DEFAULT_HEADER)
    data = _get_resp_data(resp)
    if not data:
        return
    results = _get_resp_data(resp)["rows"]

    the_list = []
    for result in results:
        # date: 1614268800000
        # dateStr: "2021-02-26"
        # freeMarketValue: 10610.8
        # indexcode: "399370"
        # market: null
        # seccode: "600519"
        # secname: "贵州茅台"
        # totalMarketValue: 26666.32
        # trade: "主要消费"
        # weight: 10.01
        stock_code = result["seccode"]
        stock_name = result["secname"]
        stock_id = china_stock_code_to_id(stock_code)

        the_list.append(
            {
                "id": "{}_{}_{}".format(entity_id, result["dateStr"], stock_id),
                "entity_id": entity_id,
                "entity_type": entity_type,
                "exchange": exchange,
                "code": code,
                "name": name,
                "timestamp": to_pd_timestamp(result["dateStr"]),
                "stock_id": stock_id,
                "stock_code": stock_code,
                "stock_name": stock_name,
                "proportion": value_to_pct(result["weight"], 0),
                "market_cap": value_multiply(result["freeMarketValue"], 100000000, 0),
            }
        )
    if the_list:
        df = pd.DataFrame.from_records(the_list)
        return df
Exemple #6
0
def get_cn_index_stock(code, timestamp, name=None):
    entity_type = 'index'
    exchange = 'sz'
    entity_id = f'{entity_type}_{exchange}_{code}'
    data_str = to_time_str(timestamp, TIME_FORMAT_MON)
    resp = requests.get(url.format(code, data_str), headers=DEFAULT_HEADER)
    data = _get_resp_data(resp)
    if not data:
        return
    results = _get_resp_data(resp)['rows']

    the_list = []
    for result in results:
        # date: 1614268800000
        # dateStr: "2021-02-26"
        # freeMarketValue: 10610.8
        # indexcode: "399370"
        # market: null
        # seccode: "600519"
        # secname: "贵州茅台"
        # totalMarketValue: 26666.32
        # trade: "主要消费"
        # weight: 10.01
        stock_code = result['seccode']
        stock_name = result['secname']
        stock_id = china_stock_code_to_id(stock_code)

        the_list.append({
            'id': '{}_{}_{}'.format(entity_id, result['dateStr'], stock_id),
            'entity_id': entity_id,
            'entity_type': entity_type,
            'exchange': exchange,
            'code': code,
            'name': name,
            'timestamp': to_pd_timestamp(result['dateStr']),
            'stock_id': stock_id,
            'stock_code': stock_code,
            'stock_name': stock_name,
            'proportion': value_to_pct(result['weight'], 0),
            'market_cap': value_multiply(result['freeMarketValue'], 100000000, 0)
        })
    if the_list:
        df = pd.DataFrame.from_records(the_list)
        return df
Exemple #7
0
    def record(self, entity, start, end, size, timestamps):
        df = run_query(
            table="finance.FUND_PORTFOLIO_STOCK",
            conditions=f"pub_date#>=#{to_time_str(start)}&code#=#{entity.code}",
            parse_dates=None,
        )
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df["timestamp"] = pd.to_datetime(df["pub_date"])

            df.rename(columns={
                "symbol": "stock_code",
                "name": "stock_name"
            },
                      inplace=True)
            df["proportion"] = df["proportion"] * 0.01

            df = portfolio_relate_stock(df, entity)

            df["stock_id"] = df["stock_code"].apply(
                lambda x: china_stock_code_to_id(x))
            df["id"] = df[["entity_id", "stock_id", "pub_date",
                           "id"]].apply(lambda x: "_".join(x.astype(str)),
                                        axis=1)
            df["report_date"] = pd.to_datetime(df["period_end"])
            df["report_period"] = df["report_type"].apply(
                lambda x: jq_to_report_period(x))

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(
                f"persist etf {entity.code} portfolio success {df.iloc[-1]['pub_date']}"
            )

        return None
    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(
                self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == 'null' or resp.text is None:
                    break
                category_jsons = demjson.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category['code']
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append({
                        'id': '{}_{}'.format(block_id, stock_id),
                        'entity_id': block_id,
                        'entity_type': 'block',
                        'exchange': entity.exchange,
                        'code': entity.code,
                        'name': entity.name,
                        'timestamp': now_pd_timestamp(),
                        'stock_id': stock_id,
                        'stock_code': stock_code,
                        'stock_name': category['name'],
                    })
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema,
                             df=df,
                             provider=self.provider,
                             force_update=True)

                self.logger.info('finish recording BlockStock:{},{}'.format(
                    entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()
    def record(self, entity, start, end, size, timestamps):
        # 忽略退市的
        if entity.end_date:
            return None
        redundant_times = 1
        while redundant_times > 0:
            df = run_query(
                table='finance.FUND_PORTFOLIO_STOCK',
                conditions=
                f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}',
                parse_dates=None)
            df = df.dropna()
            if pd_is_not_null(df):
                # data format
                #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
                # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
                # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
                # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
                # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
                df['timestamp'] = pd.to_datetime(df['pub_date'])

                df.rename(columns={
                    'symbol': 'stock_code',
                    'name': 'stock_name'
                },
                          inplace=True)
                df['proportion'] = df['proportion'] * 0.01

                df = portfolio_relate_stock(df, entity)

                df['stock_id'] = df['stock_code'].apply(
                    lambda x: china_stock_code_to_id(x))
                df['id'] = df[['entity_id', 'stock_id', 'pub_date',
                               'id']].apply(lambda x: '_'.join(x.astype(str)),
                                            axis=1)
                df['report_date'] = pd.to_datetime(df['period_end'])
                df['report_period'] = df['report_type'].apply(
                    lambda x: jq_to_report_period(x))

                saved = df_to_db(df=df,
                                 data_schema=self.data_schema,
                                 provider=self.provider,
                                 force_update=self.force_update)

                # 取不到非重复的数据
                if saved == 0:
                    return None

                # self.logger.info(df.tail())
                self.logger.info(
                    f"persist fund {entity.code}({entity.name}) portfolio success {df.iloc[-1]['pub_date']}"
                )
                latest = df['timestamp'].max()

                # 取到了最近两年的数据,再请求一次,确保取完最新的数据
                if latest.year >= now_pd_timestamp().year - 1:
                    redundant_times = redundant_times - 1
                start = latest
            else:
                return None

        return None