def department_count_spider(): department_count_list = [] department_count = SuperSpider( host='47.102.40.81', passwd='Abc12345', db='bryframe', table_name='department_count', field_list=('spider_date', 'up_date', 'name', 'list_time', 'buy_time', 'buy_sum', 'sell_time', 'sell_sum')) month_ago = department_count.date_ago(30) page = 1 while True: try: json_data = department_count.get_html( f'http://data.eastmoney.com/DataCenter_V3/stock2016/TraderStatistic/pagesize=50,page={page},sortRule=-1,sortType=,startDate={month_ago},endDate={department_count.spider_date},gpfw=0,js=var%20data_tab_1.html?rt=25754789', 'GB2312') data_list = department_count.json_to_py(json_data, deal=True)['data'] except: print(f'第{page}页获取失败') page += 1 continue if not data_list or page == 500: break print(f'第{page}页') for data in data_list: department_count.up_date = department_count.spider_date department_count.name = data['SalesName'] if department_count.name not in department_count_list: department_count_list.append(department_count.name) else: print(f'{department_count.name}-数据重复') continue sql = f'select name from department_count where name="{department_count.name}" and spider_date="{department_count.spider_date}"' same_data = department_count.sql_search(sql) if same_data: department_count.sql_search( f'delete from department_count where name="{department_count.name}" and spider_date="{department_count.spider_date}"' ) print( f'重新爬取-{department_count.spider_date}-{department_count.name}' ) department_count.list_time = department_count.to_null( data['UpCount']) department_count.buy_time = department_count.to_null( data['BCount']) department_count.buy_sum = department_count.to_null( data['SumActBMoney']) department_count.sell_time = department_count.to_null( data['SCount']) department_count.sell_sum = department_count.to_null( data['SumActSMoney']) department_count.data_save() print( f'证券营业部上榜统计:{department_count.up_date}-{department_count.name}-导入完成' ) page += 1 department_count.spider_end() print('end:证券营业部上榜统计')
def stock_count_spider(): stock_count_list = [] stock_count = SuperSpider(host='47.102.40.81', passwd='Abc12345', db='bryframe', table_name='stock_count', field_list=('spider_date', 'up_date', 'code', 'name', 'list_time', 'buy_sum', 'sell_sum', 'buy_amount')) month_ago = stock_count.date_ago(30) page = 1 while True: try: json_data = stock_count.get_html( f'http://data.eastmoney.com/DataCenter_V3/stock2016/StockStatistic/pagesize=50,page={page},sortRule=-1,sortType=,startDate={month_ago},endDate={stock_count.spider_date},gpfw=0,js=var%20data_tab_3.html?rt=25754758', 'GB2312') data_list = stock_count.json_to_py(json_data, deal=True)['data'] except: print(f'第{page}页获取失败') page += 1 continue if not data_list or page == 500: break print(f'第{page}页') for data in data_list: stock_count.up_date = data['Tdate'] stock_count.code = data['SCode'] stock_count.name = data['SName'] if (stock_count.up_date, stock_count.code) not in stock_count_list: stock_count_list.append( (stock_count.up_date, stock_count.code)) else: print( f'{stock_count.up_date}-{stock_count.code}-{stock_count.name}-数据重复' ) continue sql = f'select code from stock_count where code="{stock_count.code}" and spider_date="{stock_count.spider_date}" and up_date="{stock_count.up_date}"' same_data = stock_count.sql_search(sql) if same_data: stock_count.sql_search( f'delete from stock_count where code="{stock_count.code}" and spider_date="{stock_count.spider_date}" and up_date="{stock_count.up_date}"' ) print( f'重新爬取-{stock_count.spider_date}-{stock_count.code}-{stock_count.name}' ) stock_count.list_time = stock_count.to_null(data['SumCount']) stock_count.buy_sum = stock_count.to_null(data['Bmoney']) stock_count.sell_sum = stock_count.to_null(data['Smoney']) stock_count.buy_amount = stock_count.to_null(data['JmMoney']) stock_count.data_save() print( f'个股龙虎榜统计:{stock_count.up_date}-{stock_count.code}-{stock_count.name}-导入完成' ) page += 1 stock_count.spider_end() print('end:个股龙虎榜统计')
def department_track_spider(): department_track = SuperSpider( host='47.102.40.81', passwd='Abc12345', db='bryframe', table_name='department_track', field_list=('spider_date', 'up_date', 'code', 'name', 'list_time', 'buy_sum', 'buy_time', 'sell_time', 'buy_amount', 'up_down')) month_ago = department_track.date_ago(30) page = 1 while True: try: json_data = department_track.use_requests_to_html( f'http://data.eastmoney.com/DataCenter_V3/stock2016/JgStatistic/pagesize=50,page={page},sortRule=-1,sortType=,startDate={month_ago},endDate={department_track.spider_date},gpfw=0,js=var%20data_tab_3.html?rt=25754592', 'GB2312') data_list = department_track.json_to_py(json_data, deal=True)['data'] except: print(f'第{page}页获取失败') page += 1 continue if not data_list or page == 500: break print(f'第{page}页') for data in data_list: department_track.up_date = department_track.spider_date department_track.code = data['SCode'] department_track.name = data['SName'] department_track.list_time = department_track.to_null( data['UPCount']) department_track.buy_sum = department_track.to_null( data['JGBMoney']) department_track.buy_time = department_track.to_null( data['JGBCount']) department_track.sell_time = department_track.to_null( data['JGSCount']) department_track.buy_amount = department_track.to_null( data['JGPBuy']) department_track.up_down = department_track.to_null( data['RChange1M']) department_track.data_save() print( f'机构席位买卖追踪:{department_track.up_date}-{department_track.code}-{department_track.name}-导入完成' ) page += 1 department_track.spider_end() print('end:机构席位买卖追踪')
def bonus_data_spider(): bonus_data = SuperSpider( host='139.224.115.44', passwd='A9Vg+Dr*nP^fR=1V', db='bryframe3', table_name='bonus_data', field_list=('spider_date', 'bonus_report_date', 'code', 'name', 'cash_bonus_rate', 'transfer_rate', 'plan_announce_date', 'stock_register_date', 'remove_date', 'plan_scheduler', 'latest_announce_date')) date_list = bonus_data.data_search( 'http://data.eastmoney.com/yjfp/201812.html', '//select[@id="sel_bgq"]/option/text()', 'gb2312') year_ago_datetime = bonus_data.to_datetime(bonus_data.date_ago(365)) date_list2 = [] for aim_date in date_list: if year_ago_datetime <= bonus_data.to_datetime(str(aim_date)): date_list2.append(aim_date) else: break for use_date in date_list2: bonus_data.bonus_report_date = use_date page = 1 while True: print(f'第{page}页') try: json_data = bonus_data.get_html( f'http://data.eastmoney.com/DataCenter_V3/yjfp/getlist.ashx?js=var%20aTnZIWfZ&pagesize=50&page={page}&sr=-1&sortType=YAGGR&mtk=%C8%AB%B2%BF%B9%C9%C6%B1&filter=(ReportingPeriod=^{use_date}^)&rt=51742239', 'GB2312') data_list = bonus_data.json_to_py(json_data, deal=True)['data'] except: print(f'第{page}页获取失败') page += 1 continue if not data_list or page == 500: break for data in data_list: bonus_data.code = data['Code'] bonus_data.name = data['Name'] bonus_data.latest_announce_date = bonus_data.to_null( data['NoticeDate'][:10]) sql = f'select code from bonus_data where code="{bonus_data.code}" and spider_date="{bonus_data.spider_date}" and latest_announce_date="{bonus_data.latest_announce_date}"' same_data = bonus_data.sql_search(sql) if same_data: bonus_data.sql_search( f'delete from bonus_data where code="{bonus_data.code}" and spider_date="{bonus_data.spider_date}" and latest_announce_date="{bonus_data.latest_announce_date}"' ) print( f'重新爬取-{bonus_data.spider_date}-{bonus_data.code}-{bonus_data.name}' ) bonus_data.plan_announce_date = bonus_data.to_null( data['ResultsbyDate'][:10]) bonus_data.stock_register_date = bonus_data.to_null( data['GQDJR'][:10]) bonus_data.remove_date = bonus_data.to_null(data['CQCXR'][:10]) bonus_data.plan_scheduler = data['ProjectProgress'] group_data = data['AllocationPlan'] try: bonus_data.cash_bonus_rate = '10' + bonus_data.re_find( r'派[\d\.]+', group_data).__next__().group() + '元(含税)' except: bonus_data.cash_bonus_rate = 'null' try: transfer_rate1 = bonus_data.re_find( r'转[\d\.]+', group_data).__next__().group() except: transfer_rate1 = '' try: transfer_rate2 = bonus_data.re_find( r'送[\d\.]+', group_data).__next__().group() except: transfer_rate2 = '' if not transfer_rate1 and not transfer_rate2: bonus_data.transfer_rate = 'null' else: bonus_data.transfer_rate = '10' + transfer_rate2 + transfer_rate1 bonus_data.data_save() print( f'{bonus_data.bonus_report_date}-{bonus_data.code}-{bonus_data.name}-导入完成' ) page += 1 bonus_data.spider_end()