def exchange_suspend_info(ref_date, force_update=False): start_date = ref_date if not force_update: start_date = (find_latest_date() + dt.timedelta(days=1)) end_date = ref_date date_range = pd.date_range(start_date, end_date) datas = [] for date in date_range: if isBizDay('china.sse', date): datas.append(suspend_info(date.strftime('%Y-%m-%d'))) spyder_logger.info('Scraping finished for date {0}'.format(date)) if not datas: spyder_logger.info('No data is available for {0}'.format(ref_date)) return total_table = pd.concat(datas) total_table.drop_duplicates(['停(复)牌时间', '证券代码'], inplace=True) if not total_table.empty: insert_table(total_table, ['effectiveDate', 'instrumentID', 'instrumentName', 'status', 'reason', 'stopTime'], 'suspend_info', exchange_db_settings)
def load_fund_holding(start_date, end_date): session = login() querl_url_template = 'http://simudata.howbuy.com/profile/favouriteStocks.htm?' \ 'jjdm5=&zqdm=&endDate={0}&orderBy=cgsl&orderRule=Desc&page={1}' stamps = date_stamps(start_date, end_date) datas = [] for end_date in stamps: page = 0 previous_page = None while True: page += 1 query_url = querl_url_template.format(end_date, page) info_data = session.post(query_url) soup = BeautifulSoup(info_data.text, 'lxml') error_message = soup.find('div', attrs={'class': 'iocn'}) if error_message: raise ValueError(error_message.text) tables = soup.find_all('table') if soup == previous_page: break if tables: target_table = tables[1] if target_table.tbody.td.text == '未查询到相关数据!': break fund_data = parse_table(target_table) datas.append(fund_data) previous_page = soup spyder_logger.info("Page No. {0:4d} is finished.".format(page)) spyder_logger.info( 'Publication Date : {0} is finished for fund holding'.format( end_date)) if datas: total_table = pd.concat(datas) total_table.drop_duplicates(['基金代码', '基金简称', '股票代码'], inplace=True) return total_table[[ '基金代码', '基金简称', '截止日期', '持股数量(万股)', '持股比例(%)', '变动数量(万股)', '股票代码', '股票简称' ]] else: spyder_logger.warning("No any data got between {0} and {1}".format( start_date, end_date)) return pd.DataFrame()
def load_fund_index(start_month=200601, end_month=202201): session = login() querl_url_template = 'http://simudata.howbuy.com/profile/howbuyIndex.htm?staDate={0}' \ '&orderRule=Desc&endDate={1}&page={2}&smzs={3}' index_codes = ['HB0001', 'HB0011', 'HB0012', 'HB0014', 'HB0015', 'HB0016', 'HB0017', 'HB0018', 'HB001b', 'HB001d'] datas = [] for code in index_codes: page = 0 previous_page = None while True: page += 1 query_url = querl_url_template.format(start_month, end_month, page, code) info_data = session.post(query_url) soup = BeautifulSoup(info_data.text, 'lxml') error_message = soup.find('div', attrs={'class': 'iocn'}) if error_message: raise ValueError(error_message.text) tables = soup.find_all('table') target_table = tables[1] if soup == previous_page or target_table.tbody.td.text == '未查询到相关数据!': break fund_data = parse_table(target_table) datas.append(fund_data) previous_page = soup spyder_logger.info('Fund index:{0} is finished.'.format(code)) if datas: total_table = pd.concat(datas) total_table.drop_duplicates(['统计月份', '指数代码'], inplace=True) return total_table.reset_index(drop=True) else: return pd.DataFrame()
def exchange_announcement_info(ref_date): total_table = announcement_info(ref_date.strftime('%Y-%m-%d')) if total_table.empty: spyder_logger.info('No new data is available for {0}'.format(ref_date)) return total_table.drop_duplicates(['url'], inplace=True) if not total_table.empty: insert_table(total_table, [ 'reportDate', 'instrumentID', 'title', 'url', 'updateTime', 'exchangePlace' ], 'announcement_info', exchange_db_settings)
def load_howbuy_fund_type(ref_date): session = login() quert_url_template = 'http://simudata.howbuy.com/profile/newJjjz.htm?orderBy=clrq' \ '&orderByDesc=true&jjdm=&jldm=&glrm=&cllx=qb&zzxs=qb&syMin=&syMax=&' \ 'page={0}&perPage=30' full_table = [] page = 0 previous_page = None while True: page += 1 query_url = quert_url_template.format(page) info_data = session.post(query_url) soup = BeautifulSoup(info_data.text, 'lxml') error_message = soup.find('div', attrs={'class': 'iocn'}) if error_message: raise ValueError(error_message.text) if soup == previous_page: break previous_page = soup tables = soup.find_all('table') target_table = tables[1] fund_data = parse_table(target_table) fund_data = fund_data[fund_data['成立日期'] != 0] if len(fund_data) == 0 or fund_data.iloc[len(fund_data) - 1]['成立日期'] < ref_date: break full_table.append(fund_data) spyder_logger.info("Page No. {0:4d} is finished.".format(page)) if full_table: total_table = pd.concat(full_table) total_table.drop_duplicates(['基金代码'], inplace=True) total_table = total_table[(total_table['净值日期'] != 0) & (total_table['成立日期'] != 0)] total_table = total_table[total_table['成立日期'] >= ref_date] return total_table[[ '基金代码', '基金简称', '基金管理人', '基金经理', '成立日期', '好买策略', '复权单位净值', '净值日期' ]] else: return pd.DataFrame()
def load_howbuy_style_return(start_month=200001, end_month=202101): session = login() querl_url_template = 'http://simudata.howbuy.com/profile/strategies.htm?staDate={0}' \ '&cllx=qb&endDate={1}&page={2}&syl=j1y' datas = [] page = 0 previous_page = None while True: page += 1 query_url = querl_url_template.format(start_month, end_month, page) info_data = session.post(query_url) soup = BeautifulSoup(info_data.text, 'lxml') error_message = soup.find('div', attrs={'class': 'iocn'}) if error_message: raise ValueError(error_message.text) tables = soup.find_all('table') target_table = tables[1] if soup == previous_page or target_table.tbody.td.text == '未查询到相关数据!': break previous_page = soup fund_data = parse_table(target_table, col_level=2, col_names=[ 'No.', '统计月份', '好买策略', '最大值', '最小值', '中位数', '均值', '沪深300同期收益率' ]) datas.append(fund_data) spyder_logger.info("Page No. {0:4d} is finished.".format(page)) if datas: total_table = pd.concat(datas) total_table.drop_duplicates(['统计月份', '好买策略'], inplace=True) return total_table else: return pd.DataFrame()