def main(): pageIndex = 1 while(True): try: data = { 'url': '/db/STK_CODE/full=2&filter-STATUS_TYPE-str=正常上市&filter-STK_TYPE-str=A股&zip=Gzip&skip=0&limit=20', 'page': pageIndex, 'skip': 0, 'limit': 20, 'id': 'd4574' } resp = requests.post(API_URL, data=data, headers=getHeaders()) list = json.loads(resp.text)['data'] for item in list: id = get_uuid() corp_code = item['COMCODE'] stock_code = item['STOCKCODE'] bond_short = item['STOCKSNAME'] sql = "INSERT INTO t_jj_corp_info (id, corp_code, stock_code, bond_short) values('{0}','{1}', '{2}', '{3}')".format( id, corp_code, stock_code, bond_short) execute_sql(sql) # print(resp.content.decode('unicode_escape')) print('股票代码:{0},股票名称:{1}'.format(stock_code, bond_short)) print('已导入{0}条数据'.format(pageIndex*20)) pageIndex += 1 """ time.sleep(1) """ except BaseException: break print('导入数据成功!')
def main(): rows = get_list() stock_code_list = [] for row in rows: stock_code_list.append(row[1]) stock_codes_list = [] for chunk in list(partition(stock_code_list, 20)): stock_codes_list.append(','.join(chunk)) # print(stock_codes_list) updateIndex = 0 for stock_codes_str in stock_codes_list: url = '/base/PSTK_COM_INFO/full=2&filter-STOCKCODE-in-str={0}&zip=Gzip&skip=0&limit=20'.format( stock_codes_str) data = {'url': url, 'page': 1, 'skip': 0, 'limit': 20, 'id': '52'} resp = requests.post(API_URL, data=data, headers=getHeaders()) print('请求数据股票代码:{}'.format(stock_codes_str)) array = json.loads(resp.text)['data'] for item in array: do_update(item) updateIndex += 1 print('成功更新第{}条数据, 股票代码:{},股票名称:{}'.format(updateIndex, item['STOCKCODE'], item['A_STOCKSNAME'])) print('全部更新完成!')
def main(): duration = TimeDuration() duration.start() rows = get_corp_list() sql_list = [] stock_index = STOCK_INDEX for row in rows: corp_id = row[0] stock_code = row[1] stock_name = row[2] stock_index += 1 url = '/db/STK_INCOME_GEN/full=2&filter-A_STOCKCODE-str={}&filter-RPT_SRC-str=年报&filter-RPT_TYPE-str=合并&filter-RPT_DATE-gte-dt=20201231&zip=Gzip&field=RPT_DATE,ENDDATE,A_STOCKCODE,P110101,P150101,P110205&skip=0&limit=20'.format( stock_code) data = {'url': url, 'page': 1, 'skip': 0, 'limit': 20, 'id': 'd6664'} print('{} {},准备爬取第{}支股票,股票代码:{}, 股票名称:{},已耗时:{}'.format( datetime.datetime.now(), REPORT_NAME, stock_index, stock_code, stock_name, duration.getTillNow())) resp = requests.post(API_URL, data=data, headers=getHeaders()) json_data = json.loads(resp.text) remain_count = LIMIT_SIZE + STOCK_INDEX - stock_index if json_data['code'] != 200: print('{} 爬取失败,数据为空,当前剩余{}支股票,已耗时:{}'.format( datetime.datetime.now(), remain_count, duration.getTillNow())) continue print('{} 成功爬取数据,当前剩余{}支股票,已耗时:{}'.format(datetime.datetime.now(), remain_count, duration.getTillNow())) array = json_data['data'] new_list = filter_data(array) for item in new_list: append_sql_list(sql_list, corp_id, item) time.sleep(1) batch_execute(sql_list) print('成功更新{}数据到第{}支股票'.format(REPORT_NAME, stock_index)) duration.stop() duration.printDurationInfo()
def main(): duration = TimeDuration() duration.start() rows = get_corp_list() sql_list = [] stock_index = STOCK_INDEX for row in rows: corp_id = row[0] stock_code = row[1] stock_name = row[2] corp_code = row[3] stock_index += 1 print('{} {},准备爬取第{}支股票,股票代码:{}, 股票名称:{},已耗时:{}'.format( datetime.now(), REPORT_NAME, stock_index, stock_code, stock_name, duration.getTillNow())) pageIndex = 1 array = [] while (True): url = '/db/STK_NON_EXTR_ITEM/full=2&filter-COMCODE-int={}&filter-RPT_SRC-str=年报&filter-RPT_DATE-gte-dt=20201231&zip=Gzip&skip=0&limit=20'.format( corp_code) data = { 'url': url, 'page': pageIndex, 'skip': 0, 'limit': 20, 'id': 'd14163' } resp = requests.post(API_URL, data=data, headers=getHeaders()) try: json_data = json.loads(resp.text) except BaseException: break if json_data['code'] != 200: remain_count = LIMIT_SIZE + STOCK_INDEX - stock_index print('{} 爬取失败,数据为空,当前剩余{}支股票,已耗时:{}'.format( datetime.now(), remain_count, duration.getTillNow())) break print('{} 成功爬取第{}页数据'.format(datetime.now(), pageIndex)) array.extend(json_data['data']) pageIndex += 1 remain_count = LIMIT_SIZE + STOCK_INDEX - stock_index print('{} 成功爬取数据,当前剩余{}支股票,已耗时:{}'.format(datetime.now(), remain_count, duration.getTillNow())) if len(array) == 0: continue new_list = filter_data(array) for item in new_list: append_sql_list(sql_list, corp_id, item) time.sleep(1) batch_execute(sql_list) print('成功更新{}数据到第{}支股票'.format(REPORT_NAME, stock_index)) duration.stop() duration.printDurationInfo()