Beispiel #1
0
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            df = run_query(
                table="finance.STK_HK_HOLD_INFO",
                conditions=
                f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}")
            print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    "day": "timestamp",
                    "link_id": "holder_code",
                    "link_name": "holder_name"
                },
                          inplace=True)
                df["timestamp"] = pd.to_datetime(df["timestamp"])

                df["entity_id"] = df["code"].apply(
                    lambda x: to_entity_id(entity_type="stock", jq_code=x))
                df["code"] = df["code"].apply(lambda x: x.split(".")[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply(
                    lambda se: "{}_{}_{}".format(
                        se["holder_name"], se["entity_id"],
                        to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)),
                    axis=1,
                )

                df_to_db(df=df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
Beispiel #2
0
    def run(self):
        # 抓取fund列表
        df = finance.run_query(query(finance.FUND_MAIN_INFO))
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])

        df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x))
        df['entity_id'] = df['entity_id'].apply(
            lambda x: to_entity_id(entity_type='fund', jq_code=x))

        df['id'] = df['entity_id']
        df['entity_type'] = 'fund'
        df['exchange'] = df['entity_id'].apply(
            lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))
        df['category'] = 'fund'
        # df['choice_code'] = df.apply(lambda x:x.main_code+'.'+x.exchange.upper(),axis=1)
        # loginResult = c.start("ForceLogin=1", '')
        # df['underlying_index_code'] = df.apply(lambda x:c.css(x.choice_code, "BMINDEXCODE", "Rank=1").Data if x.operate_mode == 'ETF' else None,axis=1)
        # df['underlying_index_code'] = df['underlying_index_code'].apply(lambda x:[i for i in x.values()][0][0].lower().replace(".","_") if x else None)
        # c.stop()
        df_to_db(df,
                 data_schema=FundDetail,
                 provider=self.provider,
                 force_update=self.force_update)

        # self.logger.info(df_index)
        self.logger.info("persist etf list success")
        logout()
Beispiel #3
0
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            df = run_query(
                table='finance.STK_HK_HOLD_INFO',
                conditions=
                f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}')
            print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    'day': 'timestamp',
                    'link_id': 'holder_code',
                    'link_name': 'holder_name'
                },
                          inplace=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'])

                df['entity_id'] = df['code'].apply(
                    lambda x: to_entity_id(entity_type='stock', jq_code=x))
                df['code'] = df['code'].apply(lambda x: x.split('.')[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply(
                    lambda se: "{}_{}_{}".format(
                        se['holder_name'], se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)),
                    axis=1)

                df_to_db(df=df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
Beispiel #4
0
    def run(self):
        # 抓取fund列表
        df = finance.run_query(query(finance.FUND_MAIN_INFO))
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])

        df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x))
        df['entity_id'] = df['entity_id'].apply(
            lambda x: to_entity_id(entity_type='fund', jq_code=x))

        df['id'] = df['entity_id']
        df['entity_type'] = 'fund'
        df['exchange'] = df['entity_id'].apply(
            lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))

        df['category'] = 'fund'

        df_to_db(df,
                 data_schema=FundDetail,
                 provider=self.provider,
                 force_update=self.force_update)

        # self.logger.info(df_index)
        self.logger.info("persist etf list success")
        logout()
    def record(self, entity, start, end, size, timestamps):
        if entity.block_type != 'gics':
            return None
        # industry_stocks = get_industry_stocks(entity.code,date=now_pd_timestamp())
        industry_stocks = c.sector(entity.code,
                                   to_time_str(now_pd_timestamp()))
        if len(industry_stocks.Data) == 0:
            return None

        codes = [i for i in industry_stocks.Data if '.SH' in i or '.SZ' in i]
        names = [
            i for i in industry_stocks.Data
            if '.SH' not in i and '.SZ' not in i
        ]
        df = pd.DataFrame({"stock": codes, "stock_name": names})
        df["stock_id"] = df.stock.apply(
            lambda x: to_entity_id(x, "stock").lower())
        df["stock_code"] = df.stock_id.str.split("_", expand=True)[2]
        df["code"] = entity.code
        df["exchange"] = entity.exchange
        df["name"] = entity.name
        df["timestamp"] = now_pd_timestamp()
        df["entity_id"] = entity.id
        df["block_type"] = entity.block_type
        df["entity_type"] = "block"
        df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1)
        if df.empty:
            return None
        df_to_db(data_schema=self.data_schema,
                 df=df,
                 provider=self.provider,
                 force_update=True)

        self.logger.info('finish recording BlockStock:{},{}'.format(
            entity.category, entity.name))
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            q = jq_query(finance.STK_HK_HOLD_INFO).filter(
                finance.STK_HK_HOLD_INFO.link_id == entity.code,
                finance.STK_HK_HOLD_INFO.day == to_time_str(timestamp))

            df = finance.run_query(q)
            # print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    'day': 'timestamp',
                    'link_id': 'holder_code',
                    'link_name': 'holder_name'
                },
                          inplace=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'])

                df['entity_id'] = df['code'].apply(lambda x: to_entity_id(
                    entity_type=EntityType.Stock, jq_code=x))
                df['code'] = df['code'].apply(lambda x: x.split('.')[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply(
                    lambda se: "{}_{}_{}".format(
                        se['holder_name'], se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)),
                    axis=1)

                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
Beispiel #7
0
    def record(self, entity, start, end, size, timestamps):
        try:
            industry_stocks = get_industry_stocks(entity.code, date=now_pd_timestamp())
        except:
            industry_stocks = get_concept_stocks(entity.code, date=now_pd_timestamp())
        if len(industry_stocks) == 0:
            return None
        df = pd.DataFrame({"stock": industry_stocks})
        df["stock_id"] = df.stock.apply(lambda x: to_entity_id(x, "stock"))
        df["stock_code"] = df.stock_id.str.split("_", expand=True)[2]
        df["stock_name"] = df.stock_id.apply(lambda x:get_data(data_schema=Stock, entity_id=x, provider='joinquant').name)
        df["block_type"] = entity.block_type
        df["code"] = entity.code
        df["name"] = entity.name
        df["exchange"] = entity.exchange
        df["timestamp"] = now_pd_timestamp()
        df["entity_id"] = entity.id
        df["entity_type"] = "block"
        df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1)
        if df.empty:
            return None
        df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                 force_update=True)

        self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name))
    def to_zvt_entity(self, df, entity_type, category=None):
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])

        df['entity_id'] = df['entity_id'].apply(lambda x: to_entity_id(entity_type=entity_type, jq_code=x))
        df['id'] = df['entity_id']
        df['entity_type'] = entity_type
        df['exchange'] = df['entity_id'].apply(lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))
        df['name'] = df['display_name']
        if entity_type == 'etf':
            # ETF  查询标的指数
            df['choice_code'] = df.apply(lambda x: x.code + '.' + x.exchange.upper(), axis=1)
            loginResult = c.start("ForceLogin=1", '')
            df['underlying_index_code'] = df.apply(lambda x: c.css(x.choice_code, "BMINDEXCODE", "Rank=1").Data, axis=1)
            df['index_codes'] = df['underlying_index_code'].apply(lambda x: [i for i in x.values()][0][0])
            df['index_exchange'] = df['index_codes'].apply(lambda x: str(x).split('.'))
            df['index_code'] = df['index_codes'].apply(lambda x: str(x).split('.')[0])
            df['index_exchange'] = df['index_exchange'].apply(
                lambda x: x[1] if isinstance(x, list) and len(x) > 1 else None)
            df['underlying_index_code'] = df.apply(
                lambda x: 'index_' + x.index_exchange.lower() + '_' + x.index_code if x.index_exchange else None,
                axis=1)
            loginResult = c.stop()
        if category:
            df['category'] = category

        return df
Beispiel #9
0
    def run(self):
        # 按不同类别抓取
        # 编码    基金运作方式
        # 401001    开放式基金
        # 401002    封闭式基金
        # 401003    QDII
        # 401004    FOF
        # 401005    ETF
        # 401006    LOF
        for operate_mode_id in (401001, 401002, 401005):
            year_count = 2
            while True:
                latest = Fund.query_data(
                    region=self.region,
                    filters=[Fund.operate_mode_id == operate_mode_id],
                    order=Fund.timestamp.desc(),
                    limit=1,
                    return_type='domain')
                start_timestamp = '2000-01-01'
                if latest:
                    start_timestamp = latest[0].timestamp

                end_timestamp = min(
                    next_date(start_timestamp, 365 * year_count),
                    now_pd_timestamp(self.region))

                df = jq_run_query(
                    table='finance.FUND_MAIN_INFO',
                    conditions=
                    f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}',
                    parse_dates=['start_date', 'end_date'],
                    dtype={'main_code': str})
                if not pd_is_not_null(df) or (df['start_date'].max().year <
                                              end_timestamp.year):
                    year_count = year_count + 1

                if pd_is_not_null(df):
                    df.rename(columns={'start_date': 'timestamp'},
                              inplace=True)
                    df['timestamp'] = pd.to_datetime(df['timestamp'])
                    df['list_date'] = df['timestamp']
                    df['end_date'] = pd.to_datetime(df['end_date'])

                    df['code'] = df['main_code']
                    df['entity_id'] = df['code'].apply(
                        lambda x: to_entity_id(entity_type='fund', jq_code=x))
                    df['id'] = df['entity_id']
                    df['entity_type'] = 'fund'
                    df['exchange'] = 'sz'
                    df_to_db(df,
                             ref_df=None,
                             region=self.region,
                             data_schema=Fund,
                             provider=self.provider)
                    self.logger.info(
                        f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}'
                    )

                if is_same_date(end_timestamp, now_pd_timestamp(self.region)):
                    break
Beispiel #10
0
    def run(self):
        # 按不同类别抓取
        # 编码	基金运作方式
        # 401001	开放式基金
        # 401002	封闭式基金
        # 401003	QDII
        # 401004	FOF
        # 401005	ETF
        # 401006	LOF
        for operate_mode_id in (401001, 401002, 401005):
            year_count = 2
            while True:
                latest = Fund.query_data(
                    filters=[Fund.operate_mode_id == operate_mode_id],
                    order=Fund.timestamp.desc(),
                    limit=1,
                    return_type="domain",
                )
                start_timestamp = "2000-01-01"
                if latest:
                    start_timestamp = latest[0].timestamp

                end_timestamp = min(
                    next_date(start_timestamp, 365 * year_count),
                    now_pd_timestamp())

                df = run_query(
                    table="finance.FUND_MAIN_INFO",
                    conditions=
                    f"operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}",
                    parse_dates=["start_date", "end_date"],
                    dtype={"main_code": str},
                )
                if not pd_is_not_null(df) or (df["start_date"].max().year <
                                              end_timestamp.year):
                    year_count = year_count + 1

                if pd_is_not_null(df):
                    df.rename(columns={"start_date": "timestamp"},
                              inplace=True)
                    df["timestamp"] = pd.to_datetime(df["timestamp"])
                    df["list_date"] = df["timestamp"]
                    df["end_date"] = pd.to_datetime(df["end_date"])

                    df["code"] = df["main_code"]
                    df["entity_id"] = df["code"].apply(
                        lambda x: to_entity_id(entity_type="fund", jq_code=x))
                    df["id"] = df["entity_id"]
                    df["entity_type"] = "fund"
                    df["exchange"] = "sz"
                    df_to_db(df,
                             data_schema=Fund,
                             provider=self.provider,
                             force_update=self.force_update)
                    self.logger.info(
                        f"persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}"
                    )

                if is_same_date(end_timestamp, now_pd_timestamp()):
                    break
    def format(self, entity, df):
        df.rename(columns={
            'day': 'timestamp',
            'link_id': 'holder_code',
            'link_name': 'holder_name'
        },
                  inplace=True)

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['entity_id'] = df['code'].apply(
            lambda x: to_entity_id(entity_type=EntityType.Stock, jq_code=x))
        df['provider'] = self.provider.value
        df['code'] = df['code'].apply(lambda x: x.split('.')[0])

        df['id'] = self.generate_domain_id(entity, df)
        return df
    def to_zvt_entity(self, df, entity_type, category=None):
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])
        df['entity_id'] = df['entity_id'].apply(lambda x: x.lower())
        df['entity_id'] = df['entity_id'].apply(lambda x: to_entity_id(entity_type=entity_type, jq_code=x))
        df['id'] = df['entity_id']
        df['entity_type'] = entity_type
        df['exchange'] = df['entity_id'].apply(lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))
        df['end_date'].fillna(pd.to_datetime("22000101"),inplace=True)
        if category:
            df['category'] = category

        return df
    def to_zvt_entity(self, df, entity_type: EntityType, category=None):
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])

        df['entity_id'] = df['entity_id'].apply(lambda x: to_entity_id(entity_type=entity_type, jq_code=x))
        df['id'] = df['entity_id']
        df['entity_type'] = entity_type.value
        df['exchange'] = df['entity_id'].apply(lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))
        df['name'] = df['display_name']

        if category:
            df['category'] = category

        return df
Beispiel #14
0
    def to_zvt_entity(self, df, entity_type, category=None):
        df = df.set_index("code")
        df.index.name = "entity_id"
        df = df.reset_index()
        # 上市日期
        df.rename(columns={"start_date": "timestamp"}, inplace=True)
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        df["list_date"] = df["timestamp"]
        df["end_date"] = pd.to_datetime(df["end_date"])

        df["entity_id"] = df["entity_id"].apply(
            lambda x: to_entity_id(entity_type=entity_type, jq_code=x))
        df["id"] = df["entity_id"]
        df["entity_type"] = entity_type
        df["exchange"] = df["entity_id"].apply(
            lambda x: get_entity_exchange(x))
        df["code"] = df["entity_id"].apply(lambda x: get_entity_code(x))
        df["name"] = df["display_name"]

        if category:
            df["category"] = category

        return df