def getlpk(id): if id is None or id == '': return None id = id[1:] tempUrl = 'http://odds.500.com/fenxi/yazhi-' + id + '.shtml' req = urllib.request.Request(tempUrl) req.add_header( 'Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' ) req.add_header('Accept-Language', 'zh-CN,zh;q=0.9') #req.add_header('Accept-Encoding', 'gzip,deflate') req.add_header('Cache-Control', 'max-age=0') req.add_header('Connection', 'keep-alive') req.add_header( 'Cookie', '__utmz=63332592.1563862138.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; bdshare_firstime=1563862141629; ck_RegFromUrl=http%3A//live.500.com/; sdc_session=1565686167748; Hm_lvt_4f816d475bb0b9ed640ae412d6b42cab=1563862133,1565686168; __utmc=63332592; ck_RegUrl=live.500.com; __utma=63332592.1207027776.1563862138.1565761436.1566199533.7; motion_id=1566200339532_0.8839423026115882; Hm_lpvt_4f816d475bb0b9ed640ae412d6b42cab=1566200939; WT_FPC=id=undefined:lv=1566200939417:ss=1566199531890; sdc_userflag=1566199531894::1566200939423::2; CLICKSTRN_ID=223.68.192.125-1563862133.804552::0FC4B13DEF23459EB431241528984585; __utmt=1; __utmb=63332592.2.10.1566199533' ) req.add_header('Host', 'odds.500.com') req.add_header('Upgrade-Insecure-Requests', '1') req.add_header( 'User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3809.100 Safari/537.36" ) res = urllib.request.urlopen(tempUrl) content = res.read() encoding = res.info().get('Content-Encoding') if encoding == 'gzip': content = gzipData(content) elif encoding == 'deflate': content = deflate(content) content = content.decode("gb2312", 'ignore') soup = BeautifulSoup(content, 'html.parser') trs = soup.select("#datatb tbody tr") for tr in trs: tds = tr.select('.pl_table_data tbody tr td') if len(tds) > 2: return dealPankou(tds[1].get_text().strip().split(' ')[0]) return None
def parseData(bisaiDate, leixings): tempUrl = 'http://live.500.com/wanchang.php?e=' + bisaiDate req = urllib.request.Request(tempUrl) req.add_header( 'Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' ) req.add_header('Accept-Language', 'zh-CN,zh;q=0.9') req.add_header('Accept-Encoding', 'gzip,deflate') req.add_header('Cache-Control', 'max-age=0') req.add_header('Connection', 'keep-alive') req.add_header( 'Cookie', '__utmz=63332592.1563862138.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; bdshare_firstime=1563862141629; ck_RegFromUrl=http%3A//live.500.com/; sdc_session=1565686167748; Hm_lvt_4f816d475bb0b9ed640ae412d6b42cab=1563862133,1565686168; __utmc=63332592; ck_RegUrl=live.500.com; __utma=63332592.1207027776.1563862138.1565761436.1566199533.7; motion_id=1566200339532_0.8839423026115882; Hm_lpvt_4f816d475bb0b9ed640ae412d6b42cab=1566200939; WT_FPC=id=undefined:lv=1566200939417:ss=1566199531890; sdc_userflag=1566199531894::1566200939423::2; CLICKSTRN_ID=223.68.192.125-1563862133.804552::0FC4B13DEF23459EB431241528984585; __utmt=1; __utmb=63332592.2.10.1566199533' ) req.add_header('Host', 'live.500.com') req.add_header('Upgrade-Insecure-Requests', '1') req.add_header( 'User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3809.100 Safari/537.36" ) req.add_header( 'Content-Type', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' ) res = urllib.request.urlopen(tempUrl) content = res.read() encoding = res.info().get('Content-Encoding') if encoding == 'gzip': content = gzipData(content) elif encoding == 'deflate': content = deflate(content) soup = BeautifulSoup(content, 'html.parser') trs = soup.select('#table_match tbody tr') map = {} for tr in trs: if tr.attrs.__contains__('parentid'): continue leixing, leixing2 = changeleixing( tr.select('.ssbox_01 a')[0].get_text().strip()) if leixing not in leixings and (leixing2 == '' or (leixing2 != '' and leixing2 not in leixings)): continue zhudui = changeTeamName( tr.select('.p_lr01 a span')[0].get_text().strip()) kedui = changeTeamName( tr.select('.p_lr01 a span')[1].get_text().strip()) zhubifen = tr.select('.clt1')[0].get_text().strip() kebifen = tr.select('.clt3')[0].get_text().strip() linchangpankou = dealPankou(tr.select('.fhuise')[0].get_text().strip()) if zhubifen == '' or kebifen == '': continue tId = tr.attrs['id'] element = { 'id': tId, 'zhudui': zhudui, 'kedui': kedui, 'zhubifen': zhubifen, 'kebifen': kebifen, 'linchangpankou': linchangpankou } elements = [] if map.__contains__(leixing): elements = map.get(leixing) elements.append(element) map[leixing] = elements if leixing2 != '': elements = [] if map.__contains__(leixing2): elements = map.get(leixing2) elements.append(element) map[leixing2] = elements #print(leixing+" "+ zhudui+ " "+kedui+" "+zhubifen+":"+kebifen) return map
def parseData(bisaiDate, leixings): tempUrl = 'https://live.leisu.com/wanchang?date=' + str(bisaiDate) req = urllib.request.Request(tempUrl) req.add_header( 'Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' ) req.add_header('Accept-Language', 'zh-CN,zh;q=0.9') req.add_header('Accept-Encoding', 'gzip,deflate') req.add_header('Cache-Control', 'max-age=0') req.add_header('Connection', 'keep-alive') req.add_header( 'Cookie', 'acw_tc=2f61f27c15662043202395048e4426defba93f49a45f4cf8db50cc5b36835a; lang=; PHPSESSID=rv690j4lh7gqsuh29qrrhs3qb3; Hm_lvt_63b82ac6d9948bad5e14b1398610939a=1566204298,1566204368,1566204377; SERVERID=4ab2f7c19b72630dd03ede01228e3e61|1566974507|1566974422; Hm_lpvt_63b82ac6d9948bad5e14b1398610939a=1566974507' ) req.add_header('Host', 'live.leisu.com') req.add_header('If-None-Match', 'W/"fe010-hQk3q4B/k1m4zDOwdbMVMB9r2hI"') req.add_header('Sec-Fetch-Mode', 'navigate') req.add_header('Sec-Fetch-Site', 'none') req.add_header('Sec-Fetch-User', '?1') req.add_header('Upgrade-Insecure-Requests', '1') req.add_header( 'User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3809.100 Safari/537.36" ) res = urllib.request.urlopen(tempUrl) content = res.read() encoding = res.info().get('Content-Encoding') if encoding == 'gzip': content = gzipData(content) elif encoding == 'deflate': content = deflate(content) soup = BeautifulSoup(content, 'html.parser') trs = soup.select('li .clearfix-row') map = {} for tr in trs: leixing, leixing2 = changeleixing( tr.select('.event-name span')[0].get_text().strip()) if leixing not in leixings and (leixing2 == '' or (leixing2 != '' and leixing2 not in leixings)): continue zhudui = changeTeamName( tr.select('.lab-team-home .name')[0].get_text().strip()) kedui = changeTeamName( tr.select('.lab-team-away .name')[0].get_text().strip()) zhubifen = tr.select( '.lab-score .color-red')[0].get_text().strip().split('-')[0] kebifen = tr.select( '.lab-score .color-red')[0].get_text().strip().split('-')[1] linchangpankou = dealPankou( tr.select('.lab-ratel')[0].get_text().strip()) if linchangpankou is None: continue element = { 'zhudui': zhudui, 'kedui': kedui, 'linchangpankou': linchangpankou, 'zhubifen': zhubifen, 'kebifen': kebifen } elements = [] if map.__contains__(leixing): elements = map.get(leixing) elements.append(element) map[leixing] = elements if leixing2 != '': elements = [] if map.__contains__(leixing2): elements = map.get(leixing2) elements.append(element) map[leixing2] = elements return map