def get_data(self, place_dalian, variety, variety_id, contract_id, year, month, day, path): form_data = { 'memberDealPosiQuotes.variety': '{}'.format(variety_id), 'memberDealPosiQuotes.trade_type': '0', 'year': year, 'month': '{}'.format(month), 'day': '{}'.format(day), 'contract.contract_id': '{}'.format(contract_id), 'contract.variety_id': '{}'.format(variety_id), } try: req = requests.post( "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html", data=form_data, headers=headers()) if str(req) != '<Response [200]>': print( str(req) + '{}.{}.{}无法获取URL,响应内容长度为零,重新进行获取!'.format( year, int(month) + 1, day)) self.get_data(place_dalian, variety, variety_id, contract_id, year, month, day, path) else: try: html = req.text.encode(req.encoding).decode('utf-8') except (URLError, HTTPError) as e: print("Error: {}".format(e)) try: bsObj = BeautifulSoup(html, "html.parser") except AttributeError as e: print("Error" + e) try: tab = bsObj.findAll('table') if len(tab) > 1: ###### tab1 = tab[1] tds = tab1.findAll('tr') tr_index = len(tds) - 1 if tr_index > 1: # 如果当天有数据,就进行获取 # 创建workbook(即excel文件) workbook = xlwt.Workbook() # 创建表 worksheet = workbook.add_sheet('Sheet1') first_col = worksheet.col(0) first_col.width = 256 * 16 seco_col = worksheet.col(1) seco_col.width = 256 * 16 third_col = worksheet.col(2) third_col.width = 256 * 16 four_col = worksheet.col(3) four_col.width = 256 * 16 five_col = worksheet.col(4) five_col.width = 256 * 16 six_col = worksheet.col(5) six_col.width = 256 * 16 seve_col = worksheet.col(6) seve_col.width = 256 * 16 eight_col = worksheet.col(7) eight_col.width = 256 * 16 nine_col = worksheet.col(8) nine_col.width = 256 * 16 ten_col = worksheet.col(9) ten_col.width = 256 * 16 elev_col = worksheet.col(10) elev_col.width = 256 * 16 twel_col = worksheet.col(11) twel_col.width = 256 * 16 thritteen_col = worksheet.col(12) thritteen_col.width = 256 * 16 fourteen_col = worksheet.col(13) fourteen_col.width = 256 * 16 fifteen_col = worksheet.col(14) fifteen_col.width = 256 * 16 sixteen_col = worksheet.col(15) sixteen_col.width = 256 * 16 # 写标题行数据 worksheet.write(0, 0, '日期') worksheet.write(0, 1, '交易所') worksheet.write(0, 2, '品种') worksheet.write(0, 3, '合约') worksheet.write(0, 4, '成交量名次') worksheet.write(0, 5, '成交量会员简称') worksheet.write(0, 6, '成交量') worksheet.write(0, 7, '成交量增减') worksheet.write(0, 8, '持买单量名次') worksheet.write(0, 9, '持买单量会员简称') worksheet.write(0, 10, '持买单量') worksheet.write(0, 11, '持买单量增减') worksheet.write(0, 12, '持卖单量名次') worksheet.write(0, 13, '持卖单量会员简称') worksheet.write(0, 14, '持卖单量') worksheet.write(0, 15, '持卖单量增减') # 写列表内容数据 for trIter in tds[ 1:tr_index]: # 下标为0的那一行为表格标题,已写好,无需遍历 trIter = trIter.findAll( 'td') # 只需收集非表格标题行的列表内容 # print(len(trIter)) tempList = [] for item in trIter: tempList.append(item.string) if tempList[0:4] or tempList[4:8] or tempList[ 8:12]: worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 0, '{}/{}/{}'.format( year, int(month) + 1, day)) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 1, '{}'.format(place_dalian)) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 2, '{}'.format(variety)) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 3, '{}'.format(contract_id)) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 4, str(tempList[0]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 5, str(tempList[1]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 6, str(tempList[2]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 7, str(tempList[3]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 8, str(tempList[4]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 9, str(tempList[5]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 10, str(tempList[6]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 11, str(tempList[7]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 12, str(tempList[8]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 13, str(tempList[9]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 14, str(tempList[10]).replace('\xa0', '')) worksheet.write( int(tempList[0]) if tempList[0] != '\xa0' else (int(tempList[4]) if tempList[4] != '\xa0' else (int(tempList[8]) if tempList[8] != '\xa0' else tr_index - 1)), 15, str(tempList[11]).replace('\xa0', '')) workbook.save('{}\\{}.{}.{}.xls'.format( path, year, int(month) + 1, day)) print('{}.{}.{}的({}-{})数据已采集到并保存为excel文件'.format( year, int(month) + 1, day, variety, contract_id)) finally: pass except (RemoteDisconnected, IncompleteRead, ProtocolError, ConnectionError, ChunkedEncodingError): print('网络异常,重新发送请求!') self.get_data(place_dalian, variety, variety_id, contract_id, year, month, day, path) time.sleep(random.randint(0, 1))
def scraw_shanghai_data(self, place_shanghai, variety, contract_id, year, month, day, url, path): req = requests.get(url.format(year, month, day), headers=headers()) if req.status_code == 200: try: jsonData = req.json() except (URLError, HTTPError, JSONDecodeError) as e: print('Error:{}'.format(e)) if len(jsonData['o_cursor']) > 10: # 创建一个空的excel文件 workbook = xlwt.Workbook() # 创建表 worksheet = workbook.add_sheet('Sheet1') first_col = worksheet.col(0) first_col.width = 256 * 16 secon_col = worksheet.col(1) secon_col.width = 256 * 16 third_col = worksheet.col(2) third_col.width = 256 * 16 four_col = worksheet.col(3) four_col.width = 256 * 16 five_col = worksheet.col(4) five_col.width = 256 * 16 six_col = worksheet.col(5) six_col.width = 256 * 16 seven_col = worksheet.col(6) seven_col.width = 256 * 16 eight_col = worksheet.col(7) eight_col.width = 256 * 16 nine_col = worksheet.col(8) nine_col.width = 256 * 16 ten_col = worksheet.col(9) ten_col.width = 256 * 16 elev_col = worksheet.col(10) elev_col.width = 256 * 16 twel_col = worksheet.col(11) twel_col.width = 256 * 16 thritteen_col = worksheet.col(12) thritteen_col.width = 256 * 16 fourteen_col = worksheet.col(13) fourteen_col.width = 256 * 16 fifteen_col = worksheet.col(14) fifteen_col.width = 256 * 16 sixteen_col = worksheet.col(15) sixteen_col.width = 256 * 16 # 写标题行数据 worksheet.write(0, 0, '日期') worksheet.write(0, 1, '交易所') worksheet.write(0, 2, '品种') worksheet.write(0, 3, '合约') worksheet.write(0, 4, '成交量名次') worksheet.write(0, 5, '成交量会员简称') worksheet.write(0, 6, '成交量') worksheet.write(0, 7, '成交量增减') worksheet.write(0, 8, '持买单量名次') worksheet.write(0, 9, '持买单量会员简称') worksheet.write(0, 10, '持买单量') worksheet.write(0, 11, '持买单量增减') worksheet.write(0, 12, '持卖单量名次') worksheet.write(0, 13, '持卖单量会员简称') worksheet.write(0, 14, '持卖单量') worksheet.write(0, 15, '持卖单量增减') for i in range(len(jsonData['o_cursor'])): if jsonData['o_cursor'][i]['INSTRUMENTID'].replace( ' ', '') == contract_id: if jsonData['o_cursor'][i]['RANK'] != -1 and jsonData[ 'o_cursor'][i]['RANK'] != 0 and jsonData[ 'o_cursor'][i]['RANK'] != 999: worksheet.write( int(jsonData['o_cursor'][i]['RANK']), 0, '{}/{}/{}'.format(year, month, day)) # 日期 worksheet.write(jsonData['o_cursor'][i]['RANK'], 1, place_shanghai) # 交易所 worksheet.write(jsonData['o_cursor'][i]['RANK'], 2, variety) # 品种 worksheet.write(jsonData['o_cursor'][i]['RANK'], 3, contract_id) # 合约 worksheet.write( jsonData['o_cursor'][i]['RANK'], 4, jsonData['o_cursor'][i]['RANK']) # 成交量名次 worksheet.write( jsonData['o_cursor'][i]['RANK'], 5, jsonData['o_cursor'][i]['PARTICIPANTABBR1'], ) # 成交量会员简称 worksheet.write( jsonData['o_cursor'][i]['RANK'], 6, jsonData['o_cursor'][i]['CJ1']) # 成交量 worksheet.write( jsonData['o_cursor'][i]['RANK'], 7, jsonData['o_cursor'][i]['CJ1_CHG']) # 成交量增减 worksheet.write( jsonData['o_cursor'][i]['RANK'], 8, jsonData['o_cursor'][i]['RANK']) # 持买单量名次 worksheet.write(jsonData['o_cursor'][i]['RANK'], 9, jsonData['o_cursor'][i] ['PARTICIPANTABBR2']) # 持买单量会员简称 worksheet.write( jsonData['o_cursor'][i]['RANK'], 10, jsonData['o_cursor'][i]['CJ2']) # 持买单量 worksheet.write( jsonData['o_cursor'][i]['RANK'], 11, jsonData['o_cursor'][i]['CJ2_CHG']) # 持买单量增减 worksheet.write( jsonData['o_cursor'][i]['RANK'], 12, jsonData['o_cursor'][i]['RANK']) # 持卖单量名次 worksheet.write(jsonData['o_cursor'][i]['RANK'], 13, jsonData['o_cursor'][i] ['PARTICIPANTABBR3']) # 持卖单量会员简称 worksheet.write( jsonData['o_cursor'][i]['RANK'], 14, jsonData['o_cursor'][i]['CJ3']) # 持卖单量 worksheet.write( jsonData['o_cursor'][i]['RANK'], 15, jsonData['o_cursor'][i]['CJ3_CHG']) # 持卖单量增减 workbook.save('{}\\{}.{}.{}.xls'.format( path, year, month, day)) print('{}.{}.{}的({}-{})数据已采集到并保存为excel文件'.format( year, month, day, variety, contract_id)) time.sleep(random.randint(0, 1))
def get_data(self, place_zhengzhou, variety, contract_id, year, month, day, path): req = requests.get( 'http://www.czce.com.cn/portal/DFSStaticFiles/Future/{}/{}{}{}/FutureDataHolding.htm' .format(year, year, month, day), headers=headers()) if str(req) != '<Response [200]>': pass else: try: html = req.text.encode(req.encoding).decode('gbk') except (HTTPError, URLError) as e: print("Error: {}".format(e)) try: bsObj = BeautifulSoup(html, 'html.parser') except AttributeError as e: print("Error: {}".format(e)) tab = bsObj.findAll('table') if len(tab) > 0: tab1 = tab[1] tr = tab1.findAll('tr') contract_index = 0 for trItem in tr: contract_index += 1 if trItem.text.replace('\n', '').replace('\xa0', '').find( '品种:{}'.format(variety)) == True: break workbook = xlwt.Workbook() # 创建表 worksheet = workbook.add_sheet('Sheet1') first_col = worksheet.col(0) first_col.width = 256 * 16 seco_col = worksheet.col(1) seco_col.width = 256 * 16 third_col = worksheet.col(2) third_col.width = 256 * 16 four_col = worksheet.col(3) four_col.width = 256 * 16 five_col = worksheet.col(4) five_col.width = 256 * 16 six_col = worksheet.col(5) six_col.width = 256 * 16 seve_col = worksheet.col(6) seve_col.width = 256 * 16 eight_col = worksheet.col(7) eight_col.width = 256 * 16 nine_col = worksheet.col(8) nine_col.width = 256 * 16 ten_col = worksheet.col(9) ten_col.width = 256 * 16 elev_col = worksheet.col(10) elev_col.width = 256 * 16 twel_col = worksheet.col(11) twel_col.width = 256 * 16 thritteen_col = worksheet.col(12) thritteen_col.width = 256 * 16 fourteen_col = worksheet.col(13) fourteen_col.width = 256 * 16 fifteen_col = worksheet.col(14) fifteen_col.width = 256 * 16 sixteen_col = worksheet.col(15) sixteen_col.width = 256 * 16 # 写标题行数据 worksheet.write(0, 0, '日期') worksheet.write(0, 1, '交易所') worksheet.write(0, 2, '品种') worksheet.write(0, 3, '合约') worksheet.write(0, 4, '成交量名次') worksheet.write(0, 5, '成交量会员简称') worksheet.write(0, 6, '成交量') worksheet.write(0, 7, '成交量增减') worksheet.write(0, 8, '持买单量名次') worksheet.write(0, 9, '持买单量会员简称') worksheet.write(0, 10, '持买单量') worksheet.write(0, 11, '持买单量增减') worksheet.write(0, 12, '持卖单量名次') worksheet.write(0, 13, '持卖单量会员简称') worksheet.write(0, 14, '持卖单量') worksheet.write(0, 15, '持卖单量增减') for trIter in tr[contract_index + 1:]: tempList = [] trIter = trIter.findAll('td') for item in trIter: tempList.append(item.string) if tempList[0] == '合计': break if tempList[0:9]: worksheet.write(int(tempList[0]), 0, '{}/{}/{}'.format(year, month, day)) worksheet.write(int(tempList[0]), 1, place_zhengzhou) worksheet.write(int(tempList[0]), 2, variety) worksheet.write(int(tempList[0]), 3, contract_id) worksheet.write(int(tempList[0]), 4, tempList[0]) worksheet.write(int(tempList[0]), 5, tempList[1]) worksheet.write(int(tempList[0]), 6, tempList[2]) worksheet.write(int(tempList[0]), 7, tempList[3]) worksheet.write(int(tempList[0]), 8, tempList[0]) worksheet.write(int(tempList[0]), 9, tempList[4]) worksheet.write(int(tempList[0]), 10, tempList[5]) worksheet.write(int(tempList[0]), 11, tempList[6]) worksheet.write(int(tempList[0]), 12, tempList[0]) worksheet.write(int(tempList[0]), 13, tempList[7]) worksheet.write(int(tempList[0]), 14, tempList[8]) worksheet.write(int(tempList[0]), 15, tempList[9]) workbook.save('{}\\{}.{}.{}-{}.xls'.format( path, year, month, day, contract_id)) print('{}.{}.{}的({}-{})数据已采集到并保存为excel文件'.format( year, month, day, variety, contract_id)) time.sleep(random.randint(0, 1))