def acquire_fund_base(): lock = Lock() each_fund_query = FundQuery() each_fund_insert = FundInsert() record_total = each_fund_query.get_fund_count_from_snapshot_no_exist( ) # 获取记录条数 idWorker = IdWorker() print('record_total', record_total) error_funds = [] # 一些异常的基金详情页,如果发现记录该基金的code def crawlData(start, end): login_url = 'https://www.morningstar.cn/membership/signin.aspx' chrome_driver = login_morning_star(login_url, False) page_start = start page_limit = 10 # 遍历从基金列表的单支基金 while (page_start < end): results = each_fund_query.get_fund_from_snapshot_table_no_exist( page_start, page_limit) for record in results: each_fund = FundSpider(record[0], record[1], record[2], chrome_driver) # 从晨星网上更新信息 is_normal = each_fund.go_fund_url() if is_normal == False: lock.acquire() error_funds.append(each_fund.fund_code) lock.release() continue each_fund.get_fund_base_info() # 去掉没有成立时间的 if each_fund.found_date == '-': lock.acquire() error_funds.append(each_fund.fund_code) lock.release() continue # 拼接sql需要的数据 lock.acquire() snow_flake_id = idWorker.get_id() lock.release() base_dict = { 'id': snow_flake_id, 'fund_code': each_fund.fund_code, 'morning_star_code': each_fund.morning_star_code, 'fund_name': each_fund.fund_name, 'fund_cat': each_fund.fund_cat, 'company': each_fund.company, 'found_date': each_fund.found_date } each_fund_insert.insert_fund_base_info(base_dict) page_start = page_start + page_limit print('page_start', page_start) chrome_driver.close() bootstrap_thread(crawlData, record_total, 4) print('error_funds', error_funds)
def get_fund_list(cookie_str=None): from selenium import webdriver options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") chrome_driver = webdriver.Chrome('./chromedriver/chromedriver.exe', chrome_options=options) chrome_driver.set_page_load_timeout(12000) # 防止页面加载个没完 morning_fund_selector_url = "https://www.morningstar.cn/fundselect/default.aspx" # "https://cn.morningstar.com/quickrank/default.aspx" """ 模拟登录,支持两种方式: 1. 设置已经登录的cookie 2. 输入账号,密码,验证码登录(验证码识别正确率30%,识别识别支持重试) """ if cookie_str: set_cookies(chrome_driver, morning_fund_selector_url, cookie_str) else: morning_cookies = "" if morning_cookies == "": login_status = login_site(chrome_driver, morning_fund_selector_url) if login_status: print('login success') sleep(3) else: print('login fail') exit() # 获取网站cookie morning_cookies = chrome_driver.get_cookies() else: chrome_driver.get(morning_fund_selector_url) # 再次打开爬取页面 print(chrome_driver.get_cookies()) # 打印设置成功的cookie # 定义起始页码 page_num = 1 page_count = 25 page_num_total = math.ceil( int( chrome_driver.find_element_by_xpath( '/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count) result_dir = '../output/' output_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \ '类型' + ',' + '三年评级' + ',' + '五年评级' + ',' + '今年回报率' + '\n' # 设置表头 if page_num == 1: with open(result_dir + 'fund_morning_star.csv', 'w+') as csv_file: csv_file.write(output_head) while page_num <= page_num_total: # 求余 remainder = page_num_total % 10 # 判断是否最后一页 num = (remainder + 2) if page_num > (page_num_total - remainder) else 12 xpath_str = '/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/a[%s]' % ( num) print('page_num', page_num) # 等待,直到当前页(样式判断)等于page_num WebDriverWait(chrome_driver, timeout=600).until( text_to_be_present_in_element( "/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/span[@style='margin-right:5px;font-weight:Bold;color:red;']", str(page_num), xpath_str)) sleep(1) # 列表用于存放爬取的数据 id_list = [] # 雪花id code_list = [] # 基金代码 morning_star_code_list = [] # 晨星专属代码 name_list = [] # 基金名称 fund_cat = [] # 基金分类 fund_rating_3 = [] # 晨星评级(三年) fund_rating_5 = [] # 晨星评级(五年) rate_of_return = [] # 今年以来汇报(%) # 获取每页的源代码 data = chrome_driver.page_source # 利用BeautifulSoup解析网页源代码 bs = BeautifulSoup(data, 'lxml') class_list = ['gridItem', 'gridAlternateItem'] # 数据在这两个类下面 # 取出所有类的信息,并保存到对应的列表里 for i in range(len(class_list)): for tr in bs.find_all('tr', {'class': class_list[i]}): # 雪花id worker = IdWorker() id_list.append(worker.get_id()) tds_text = tr.find_all('td', {'class': "msDataText"}) tds_nume = tr.find_all('td', {'class': "msDataNumeric"}) # 基金代码 code_a_element = tds_text[0].find_all('a')[0] code_list.append(code_a_element.string) # 从href中匹配出晨星专属代码 current_morning_code = re.findall( r'(?<=/quicktake/)(\w+)$', code_a_element.get('href')).pop(0) # 晨星基金专属晨星码 morning_star_code_list.append(current_morning_code) name_list.append(tds_text[1].find_all('a')[0].string) # 基金分类 fund_cat.append(tds_text[2].string) # 三年评级 rating = get_star_count(tds_text[3].find_all('img')[0]['src']) fund_rating_3.append(rating) # 5年评级 rating = get_star_count(tds_text[4].find_all('img')[0]['src']) fund_rating_5.append(rating) # 今年以来回报(%) return_value = tds_nume[ 3].string if tds_nume[3].string != '-' else None rate_of_return.append(return_value) print('数据准备完毕') fund_df = pd.DataFrame({ 'id': id_list, 'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat, 'fund_rating_3': fund_rating_3, 'fund_rating_5': fund_rating_5, 'rate_of_return': rate_of_return }) sql_insert = "replace into fund_morning_star(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s, %s, %s)" # print('fund_df', fund_df) fund_list = fund_df.values.tolist() # cursor.executemany(sql_insert, fund_list) # connect.commit() print('fund_list', fund_list) with open(result_dir + 'fund_morning_star.csv', 'a') as csv_file: for fund_item in fund_list: output_line = ', '.join(str(x) for x in fund_item) + '\n' csv_file.write(output_line) # 获取下一页元素 next_page = chrome_driver.find_element_by_xpath(xpath_str) # 点击下一页 next_page.click() page_num += 1 chrome_driver.close() print('end')
def get_fund_list(page_index): morning_fund_selector_url = "https://www.morningstar.cn/fundselect/default.aspx" chrome_driver = login_morning_star(morning_fund_selector_url, False) # 定义起始页码 page_count = 25 # 晨星固定分页数 page_total = math.ceil( int( chrome_driver.find_element_by_xpath( '/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count) result_dir = './output/' output_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \ '类型' + ',' + '三年评级' + ',' + '五年评级' + ',' + '今年回报率' + '\n' env_snapshot_table_name = os.getenv('snapshot_table_name') output_file_name = env_snapshot_table_name + ".csv" # 设置表头 if page_index == 1: with open(result_dir + output_file_name, 'w+') as csv_file: csv_file.write(output_head) while page_index <= page_total: # 求余 remainder = page_total % 10 # 判断是否最后一页 num = (remainder + 2) if page_index > (page_total - remainder) else 12 xpath_str = '/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/a[%s]' % ( num) print('page_index', page_index) # 等待,直到当前页(样式判断)等于page_num WebDriverWait(chrome_driver, timeout=600).until( text_to_be_present_in_element( "/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/span[@style='margin-right:5px;font-weight:Bold;color:red;']", str(page_index), xpath_str)) sleep(1) # 列表用于存放爬取的数据 id_list = [] # 雪花id code_list = [] # 基金代码 morning_star_code_list = [] # 晨星专属代码 name_list = [] # 基金名称 fund_cat = [] # 基金分类 fund_rating_3 = [] # 晨星评级(三年) fund_rating_5 = [] # 晨星评级(五年) rate_of_return = [] # 今年以来汇报(%) # 获取每页的源代码 data = chrome_driver.page_source # 利用BeautifulSoup解析网页源代码 bs = BeautifulSoup(data, 'lxml') class_list = ['gridItem', 'gridAlternateItem'] # 数据在这两个类下 # 取出所有类的信息,并保存到对应的列表里 for i in range(len(class_list)): for tr in bs.find_all('tr', {'class': class_list[i]}): # 雪花id worker = IdWorker() id_list.append(worker.get_id()) tds_text = tr.find_all('td', {'class': "msDataText"}) tds_nume = tr.find_all('td', {'class': "msDataNumeric"}) # 基金代码 code_a_element = tds_text[0].find_all('a')[0] code_list.append(code_a_element.string) # 从href中匹配出晨星专属代码 current_morning_code = re.findall( r'(?<=/quicktake/)(\w+)$', code_a_element.get('href')).pop(0) # 晨星基金专属晨星码 morning_star_code_list.append(current_morning_code) name_list.append(tds_text[1].find_all('a')[0].string) # 基金分类 fund_cat.append(tds_text[2].string) # 三年评级 rating = get_star_count(tds_text[3].find_all('img')[0]['src']) fund_rating_3.append(rating) # 5年评级 rating = get_star_count(tds_text[4].find_all('img')[0]['src']) fund_rating_5.append(rating) # 今年以来回报(%) return_value = tds_nume[ 3].string if tds_nume[3].string != '-' else None rate_of_return.append(return_value) print('数据准备完毕') fund_df = pd.DataFrame({ 'id': id_list, 'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat, 'fund_rating_3': fund_rating_3, 'fund_rating_5': fund_rating_5, 'rate_of_return': rate_of_return }) env_snapshot_table_name = os.getenv('snapshot_table_name') sql_insert = "INSERT INTO " + env_snapshot_table_name + \ "(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE fund_rating_3=VALUES(fund_rating_3), fund_rating_5=VALUES(fund_rating_5), rate_of_return=VALUES(rate_of_return);" # print('fund_df', fund_df) fund_list = fund_df.values.tolist() cursor.executemany(sql_insert, fund_list) connect_instance.commit() # print('fund_list', fund_list) # 输出为csv文件 with open(result_dir + output_file_name, 'a') as csv_file: for fund_item in fund_list: output_line = ', '.join(str(x) for x in fund_item) + '\n' csv_file.write(output_line) # 获取下一页元素 next_page = chrome_driver.find_element_by_xpath(xpath_str) # 点击下一页 next_page.click() sleep(3) page_index += 1 chrome_driver.close() print('end')
connect_instance = connect() cursor = connect_instance.cursor() if __name__ == '__main__': # 获取数据库的基金列表 env_snapshot_table_name = os.getenv('snapshot_table_name') sql_count = "SELECT count(*) FROM " + env_snapshot_table_name + \ " WHERE fund_code NOT IN (SELECT fund_code FROM fund_morning_base);" cursor.execute(sql_count) count = cursor.fetchone() # 获取记录条数 print('count', count[0]) login_url = 'https://www.morningstar.cn/membership/signin.aspx' chrome_driver = login_morning_star(login_url, True) IdWorker = IdWorker() page_limit = 10 record_total = count[0] page_start = 0 error_funds = [] # 一些异常的基金详情页,如果发现记录该基金的code # 遍历从基金列表的单支基金 while (page_start < record_total): # 从fund_morning_snapshot_2021_q1 查出 fund_morning_base 中不存在的基金 sql = "SELECT fund_code, morning_star_code, fund_name FROM " + env_snapshot_table_name + \ " WHERE fund_code NOT IN (SELECT fund_code FROM fund_morning_base) ORDER BY fund_code LIMIT %s, %s" cursor.execute(sql, [page_start, page_limit]) # 执行sql语句 results = cursor.fetchall() # 获取查询的所有记录 for record in results: each_fund = FundSpider(record[0], record[1], record[2], chrome_driver) # 从晨星网上更新信息
def crawlData(start, end): chrome_driver = login() morning_cookies = chrome_driver.get_cookies() page_start = start page_limit = 10 while(page_start < end): sql = "SELECT t.fund_code,\ t.morning_star_code, t.fund_name, t.fund_cat \ FROM fund_morning_base as t \ LEFT JOIN fund_morning_snapshot as f ON f.fund_code = t.fund_code \ WHERE t.fund_cat NOT LIKE '%%货币%%' \ AND t.fund_cat NOT LIKE '%%纯债基金%%' \ AND t.fund_cat NOT LIKE '目标日期' \ AND t.fund_cat NOT LIKE '%%短债基金%%' \ ORDER BY f.fund_rating_5 DESC,f.fund_rating_3 DESC, \ t.fund_cat, t.fund_code LIMIT %s, %s" lock.acquire() cursor.execute( sql, [page_start, page_limit]) # 执行sql语句 results = cursor.fetchall() # 获取查询的所有记录 lock.release() for record in results: sleep(1) print(current_thread().getName(), 'record-->', record) each_fund = FundSpider( record[0], record[1], record[2], chrome_driver, morning_cookies) is_normal = each_fund.go_fund_url() # 是否能正常跳转到基金详情页,没有的话,写入csv,退出当前循环 if is_normal == False: lock.acquire() error_funds.append(each_fund.fund_code) fund_infos = [each_fund.fund_code, each_fund.morning_star_code, each_fund.fund_name, record[3], page_start, '页面跳转有问题'] with open(result_dir + 'fund_morning_season_error.csv', 'a') as csv_file: output_line = ', '.join(str(x) for x in fund_infos) + '\n' csv_file.write(output_line) lock.release() continue # 开始爬取数据 each_fund.get_fund_season_info() # 基本数据 each_fund.get_fund_manager_info() # 基金经理模块 each_fund.get_fund_morning_rating() # 基金晨星评级 each_fund.get_fund_qt_rating() # 基金风险评级 # 判断是否有股票持仓,有则爬取 if each_fund.stock_position['total'] != '0.00' and each_fund.total_asset != None: each_fund.get_asset_composition_info() # 爬取过程中是否有异常 if each_fund._is_trigger_catch == True: lock.acquire() fund_infos = [each_fund.fund_code, each_fund.morning_star_code, each_fund.fund_name, record[3], each_fund.stock_position['total'], page_start, each_fund._catch_detail] with open(result_dir + 'fund_morning_season_catch.csv', 'a') as csv_file: output_line = ', '.join(str(x) for x in fund_infos) + '\n' csv_file.write(output_line) lock.release() # 入库 lock.acquire() snow_flake_id = IdWorker.get_id() lock.release() # 基金经理 if each_fund.manager.get('id'): manager_dict = { 'id': snow_flake_id, 'manager_id': each_fund.manager.get('id'), 'name': each_fund.manager.get('name'), 'brife': each_fund.manager.get('brife') } manager_sql_insert = generate_insert_sql( manager_dict, 'fund_morning_manager', ['id', 'manager_id', 'name']) lock.acquire() cursor.execute(manager_sql_insert, tuple(manager_dict.values())) connect_instance.commit() lock.release() # 季度信息 TODO: 对比数据更新时间field season_dict = { 'id': snow_flake_id, 'quarter_index': each_fund.quarter_index, 'fund_code': each_fund.fund_code, 'investname_style': each_fund.investname_style, 'total_asset': each_fund.total_asset, 'manager_id': each_fund.manager.get('id'), 'manager_start_date': each_fund.manager.get('start_date'), 'three_month_retracement': each_fund.three_month_retracement, 'june_month_retracement': each_fund.june_month_retracement, 'risk_statistics_alpha': each_fund.risk_statistics.get('alpha'), 'risk_statistics_beta': each_fund.risk_statistics.get('beta'), 'risk_statistics_r_square': each_fund.risk_statistics.get('r_square'), 'risk_assessment_standard_deviation': each_fund.risk_assessment.get('standard_deviation'), 'risk_assessment_risk_coefficient': each_fund.risk_assessment.get('risk_coefficient'), 'risk_assessment_sharpby': each_fund.risk_assessment.get('sharpby'), 'risk_rating_2': each_fund.risk_rating.get(2), 'risk_rating_3': each_fund.risk_rating.get(3), 'risk_rating_5': each_fund.risk_rating.get(5), 'risk_rating_10': each_fund.risk_rating.get(10), 'stock_position_total': each_fund.stock_position.get('total'), 'stock_position_ten': each_fund.stock_position.get('ten'), 'bond_position_total': each_fund.bond_position.get('total'), 'bond_position_five': each_fund.bond_position.get('five'), 'morning_star_rating_3': each_fund.morning_star_rating.get(3), 'morning_star_rating_5': each_fund.morning_star_rating.get(5), 'morning_star_rating_10': each_fund.morning_star_rating.get(10), } season_sql_insert = generate_insert_sql( season_dict, 'fund_morning_season', ['id', 'quarter_index', 'fund_code']) lock.acquire() cursor.execute(season_sql_insert, tuple(season_dict.values())) connect_instance.commit() lock.release() # 入库十大股票持仓 stock_position_total = each_fund.stock_position.get( 'total', '0.00') if float(stock_position_total) > 0: stock_dict = { 'id': snow_flake_id, 'quarter_index': each_fund.quarter_index, 'fund_code': each_fund.fund_code, 'stock_position_total': each_fund.stock_position.get('total'), } for index in range(len(each_fund.ten_top_stock_list)): temp_stock = each_fund.ten_top_stock_list[index] prefix = 'top_stock_' + str(index) + '_' code_key = prefix + 'code' stock_dict[code_key] = temp_stock['stock_code'] name_key = prefix + 'name' stock_dict[name_key] = temp_stock['stock_name'] portion_key = prefix + 'portion' stock_dict[portion_key] = temp_stock['stock_portion'] market_key = prefix + 'market' stock_dict[market_key] = temp_stock['stock_market'] stock_sql_insert = generate_insert_sql( stock_dict, 'fund_morning_stock_info', ['id', 'quarter_index', 'fund_code']) lock.acquire() # print('stock_sql_insert', stock_sql_insert) cursor.execute(stock_sql_insert, tuple(stock_dict.values())) connect_instance.commit() lock.release() # pprint(fundDict) page_start = page_start + page_limit print(current_thread().getName(), 'page_start', page_start) sleep(3) chrome_driver.close()
) return sql_insert if __name__ == '__main__': # 过滤没有股票持仓的基金 sql_count = "SELECT COUNT(1) FROM fund_morning_base \ LEFT JOIN fund_morning_snapshot ON fund_morning_snapshot.fund_code = fund_morning_base.fund_code \ WHERE fund_morning_base.fund_cat NOT LIKE '%%货币%%' \ AND fund_morning_base.fund_cat NOT LIKE '%%纯债基金%%' \ AND fund_morning_base.fund_cat NOT LIKE '目标日期' \ AND fund_morning_base.fund_cat NOT LIKE '%%短债基金%%'" cursor.execute(sql_count) count = cursor.fetchone() # 获取记录条数 print('count', count[0]) IdWorker = IdWorker() page_limit = 5 record_total = count[0] page_start = 0 error_funds = [] output_catch_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \ '类型' + ',' + '股票总仓位' + ',' + '页码' + ',' + '备注' + '\n' # 设置表头 result_dir = './output/' if page_start == 0: with open(result_dir + 'fund_morning_season_catch.csv', 'w+') as csv_file: csv_file.write(output_catch_head) output_catch_error = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \ '类型' + ',' + '页码' + ',' + '备注' + '\n' if page_start == 0:
def crawlData(start, end): login_url = 'https://www.morningstar.cn/membership/signin.aspx' chrome_driver = login_morning_star(login_url, True) page_start = start page_limit = 10 while (page_start < end): results = each_fund_query.select_quarter_fund( page_start, page_limit) for record in results: sleep(1) each_fund = FundSpider(record[0], record[1], record[2], chrome_driver) is_normal = each_fund.go_fund_url() # 是否能正常跳转到基金详情页,没有的话,写入csv,退出当前循环 if is_normal == False: # error_funds.append(each_fund.fund_code) fund_infos = [ each_fund.fund_code, each_fund.morning_star_code, each_fund.fund_name, record[3], page_start, '页面跳转有问题' ] output_line = ', '.join(str(x) for x in fund_infos) + '\n' fund_csv.write_abnormal_url_fund(False, output_line) continue # 开始爬取数据 quarter_index = each_fund.get_quarter_index( ) # 数据更新时间,如果不一致,不爬取下面数据 if quarter_index != each_fund.quarter_index: print('quarter_index', quarter_index, each_fund.update_date, each_fund.fund_code, each_fund.fund_name) continue each_fund.get_fund_season_info() # 基本季度性数据 each_fund.get_fund_manager_info() # 基金经理模块 each_fund.get_fund_morning_rating() # 基金晨星评级 each_fund.get_fund_qt_rating() # 基金风险评级 # 判断是否有股票持仓,有则爬取 if each_fund.stock_position[ 'total'] != '0.00' and each_fund.total_asset != None: each_fund.get_asset_composition_info() # 爬取过程中是否有异常,有的话,存在csv中 if each_fund._is_trigger_catch == True: fund_infos = [ each_fund.fund_code, each_fund.morning_star_code, each_fund.fund_name, record[3], each_fund.stock_position['total'], page_start, each_fund._catch_detail ] output_line = ', '.join(str(x) for x in fund_infos) + '\n' fund_csv.write_season_catch_fund(False, output_line) # 入库 lock.acquire() snow_flake_id = IdWorker.get_id() lock.release() # 开始存入数据 fund_insert = FundInsert() # 基金经理 if each_fund.manager.get('id'): manager_dict = { 'id': snow_flake_id, 'manager_id': each_fund.manager.get('id'), 'name': each_fund.manager.get('name'), 'brife': each_fund.manager.get('brife') } fund_insert.insert_fund_manger_info(manager_dict) quarterly_dict = { 'id': snow_flake_id, 'quarter_index': each_fund.quarter_index, 'fund_code': each_fund.fund_code, 'investname_style': each_fund.investname_style, 'total_asset': each_fund.total_asset, 'manager_id': each_fund.manager.get('id'), 'manager_start_date': each_fund.manager.get('start_date'), 'three_month_retracement': each_fund.three_month_retracement, 'june_month_retracement': each_fund.june_month_retracement, 'risk_statistics_alpha': each_fund.risk_statistics.get('alpha'), 'risk_statistics_beta': each_fund.risk_statistics.get('beta'), 'risk_statistics_r_square': each_fund.risk_statistics.get('r_square'), 'risk_assessment_standard_deviation': each_fund.risk_assessment.get('standard_deviation'), 'risk_assessment_risk_coefficient': each_fund.risk_assessment.get('risk_coefficient'), 'risk_assessment_sharpby': each_fund.risk_assessment.get('sharpby'), 'risk_rating_2': each_fund.risk_rating.get(2), 'risk_rating_3': each_fund.risk_rating.get(3), 'risk_rating_5': each_fund.risk_rating.get(5), 'risk_rating_10': each_fund.risk_rating.get(10), 'stock_position_total': each_fund.stock_position.get('total'), 'stock_position_ten': each_fund.stock_position.get('ten'), 'bond_position_total': each_fund.bond_position.get('total'), 'bond_position_five': each_fund.bond_position.get('five'), 'morning_star_rating_3': each_fund.morning_star_rating.get(3), 'morning_star_rating_5': each_fund.morning_star_rating.get(5), 'morning_star_rating_10': each_fund.morning_star_rating.get(10), } fund_insert.fund_quarterly_info(quarterly_dict) # 入库十大股票持仓 stock_position_total = each_fund.stock_position.get( 'total', '0.00') if float(stock_position_total) > 0: stock_dict = { 'id': snow_flake_id, 'quarter_index': each_fund.quarter_index, 'fund_code': each_fund.fund_code, 'stock_position_total': each_fund.stock_position.get('total'), } for index in range(len(each_fund.ten_top_stock_list)): temp_stock = each_fund.ten_top_stock_list[index] prefix = 'top_stock_' + str(index) + '_' code_key = prefix + 'code' stock_dict[code_key] = temp_stock['stock_code'] name_key = prefix + 'name' stock_dict[name_key] = temp_stock['stock_name'] portion_key = prefix + 'portion' stock_dict[portion_key] = temp_stock['stock_portion'] market_key = prefix + 'market' stock_dict[market_key] = temp_stock['stock_market'] fund_insert.fund_stock_info(stock_dict) # 获取同类基金,再获取同类基金的总资产 if each_fund.fund_name.endswith('A'): similar_name = each_fund.fund_name[0:-1] results = each_fund_query.select_similar_fund( similar_name) # 获取查询的所有记录 platform = 'zh_fund' if '封闭' in similar_name else 'ai_fund' for i in range(0, len(results)): item = results[i] item_code = item[0] total_asset = get_total_asset(item_code, platform) quarterly_dict['fund_code'] = item_code quarterly_dict['total_asset'] = total_asset quarterly_dict['id'] = snow_flake_id + i + 1 fund_insert.fund_quarterly_info(quarterly_dict) if float(stock_position_total) > 0: stock_dict['fund_code'] = item_code stock_dict['id'] = snow_flake_id + i + 1 fund_insert.fund_stock_info(stock_dict) # pprint(fundDict) page_start = page_start + page_limit print(current_thread().getName(), 'page_start', page_start) sleep(3) chrome_driver.close()