Beispiel #1
0
def getlpk(id):
    if id is None or id == '':
        return None
    id = id[1:]
    tempUrl = 'http://odds.500.com/fenxi/yazhi-' + id + '.shtml'
    req = urllib.request.Request(tempUrl)
    req.add_header(
        'Accept',
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
    )
    req.add_header('Accept-Language', 'zh-CN,zh;q=0.9')
    #req.add_header('Accept-Encoding', 'gzip,deflate')
    req.add_header('Cache-Control', 'max-age=0')
    req.add_header('Connection', 'keep-alive')
    req.add_header(
        'Cookie',
        '__utmz=63332592.1563862138.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; bdshare_firstime=1563862141629; ck_RegFromUrl=http%3A//live.500.com/; sdc_session=1565686167748; Hm_lvt_4f816d475bb0b9ed640ae412d6b42cab=1563862133,1565686168; __utmc=63332592; ck_RegUrl=live.500.com; __utma=63332592.1207027776.1563862138.1565761436.1566199533.7; motion_id=1566200339532_0.8839423026115882; Hm_lpvt_4f816d475bb0b9ed640ae412d6b42cab=1566200939; WT_FPC=id=undefined:lv=1566200939417:ss=1566199531890; sdc_userflag=1566199531894::1566200939423::2; CLICKSTRN_ID=223.68.192.125-1563862133.804552::0FC4B13DEF23459EB431241528984585; __utmt=1; __utmb=63332592.2.10.1566199533'
    )
    req.add_header('Host', 'odds.500.com')
    req.add_header('Upgrade-Insecure-Requests', '1')
    req.add_header(
        'User-Agent',
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3809.100 Safari/537.36"
    )
    res = urllib.request.urlopen(tempUrl)
    content = res.read()
    encoding = res.info().get('Content-Encoding')
    if encoding == 'gzip':
        content = gzipData(content)
    elif encoding == 'deflate':
        content = deflate(content)
    content = content.decode("gb2312", 'ignore')
    soup = BeautifulSoup(content, 'html.parser')
    trs = soup.select("#datatb tbody tr")
    for tr in trs:
        tds = tr.select('.pl_table_data tbody tr td')
        if len(tds) > 2:
            return dealPankou(tds[1].get_text().strip().split(' ')[0])
    return None
Beispiel #2
0
def parseData(bisaiDate, leixings):
    tempUrl = 'http://live.500.com/wanchang.php?e=' + bisaiDate
    req = urllib.request.Request(tempUrl)
    req.add_header(
        'Accept',
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
    )
    req.add_header('Accept-Language', 'zh-CN,zh;q=0.9')
    req.add_header('Accept-Encoding', 'gzip,deflate')
    req.add_header('Cache-Control', 'max-age=0')
    req.add_header('Connection', 'keep-alive')
    req.add_header(
        'Cookie',
        '__utmz=63332592.1563862138.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; bdshare_firstime=1563862141629; ck_RegFromUrl=http%3A//live.500.com/; sdc_session=1565686167748; Hm_lvt_4f816d475bb0b9ed640ae412d6b42cab=1563862133,1565686168; __utmc=63332592; ck_RegUrl=live.500.com; __utma=63332592.1207027776.1563862138.1565761436.1566199533.7; motion_id=1566200339532_0.8839423026115882; Hm_lpvt_4f816d475bb0b9ed640ae412d6b42cab=1566200939; WT_FPC=id=undefined:lv=1566200939417:ss=1566199531890; sdc_userflag=1566199531894::1566200939423::2; CLICKSTRN_ID=223.68.192.125-1563862133.804552::0FC4B13DEF23459EB431241528984585; __utmt=1; __utmb=63332592.2.10.1566199533'
    )
    req.add_header('Host', 'live.500.com')
    req.add_header('Upgrade-Insecure-Requests', '1')
    req.add_header(
        'User-Agent',
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3809.100 Safari/537.36"
    )
    req.add_header(
        'Content-Type',
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
    )

    res = urllib.request.urlopen(tempUrl)
    content = res.read()
    encoding = res.info().get('Content-Encoding')
    if encoding == 'gzip':
        content = gzipData(content)
    elif encoding == 'deflate':
        content = deflate(content)
    soup = BeautifulSoup(content, 'html.parser')
    trs = soup.select('#table_match tbody tr')
    map = {}

    for tr in trs:
        if tr.attrs.__contains__('parentid'):
            continue
        leixing, leixing2 = changeleixing(
            tr.select('.ssbox_01 a')[0].get_text().strip())
        if leixing not in leixings and (leixing2 == '' or
                                        (leixing2 != ''
                                         and leixing2 not in leixings)):
            continue
        zhudui = changeTeamName(
            tr.select('.p_lr01 a span')[0].get_text().strip())
        kedui = changeTeamName(
            tr.select('.p_lr01 a span')[1].get_text().strip())

        zhubifen = tr.select('.clt1')[0].get_text().strip()
        kebifen = tr.select('.clt3')[0].get_text().strip()
        linchangpankou = dealPankou(tr.select('.fhuise')[0].get_text().strip())

        if zhubifen == '' or kebifen == '':
            continue
        tId = tr.attrs['id']

        element = {
            'id': tId,
            'zhudui': zhudui,
            'kedui': kedui,
            'zhubifen': zhubifen,
            'kebifen': kebifen,
            'linchangpankou': linchangpankou
        }
        elements = []
        if map.__contains__(leixing):
            elements = map.get(leixing)
        elements.append(element)
        map[leixing] = elements

        if leixing2 != '':
            elements = []
            if map.__contains__(leixing2):
                elements = map.get(leixing2)
            elements.append(element)
            map[leixing2] = elements

        #print(leixing+" "+ zhudui+ " "+kedui+" "+zhubifen+":"+kebifen)
    return map
Beispiel #3
0
def parseData(bisaiDate, leixings):
    tempUrl = 'https://live.leisu.com/wanchang?date=' + str(bisaiDate)
    req = urllib.request.Request(tempUrl)
    req.add_header(
        'Accept',
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
    )
    req.add_header('Accept-Language', 'zh-CN,zh;q=0.9')
    req.add_header('Accept-Encoding', 'gzip,deflate')
    req.add_header('Cache-Control', 'max-age=0')
    req.add_header('Connection', 'keep-alive')
    req.add_header(
        'Cookie',
        'acw_tc=2f61f27c15662043202395048e4426defba93f49a45f4cf8db50cc5b36835a; lang=; PHPSESSID=rv690j4lh7gqsuh29qrrhs3qb3; Hm_lvt_63b82ac6d9948bad5e14b1398610939a=1566204298,1566204368,1566204377; SERVERID=4ab2f7c19b72630dd03ede01228e3e61|1566974507|1566974422; Hm_lpvt_63b82ac6d9948bad5e14b1398610939a=1566974507'
    )

    req.add_header('Host', 'live.leisu.com')
    req.add_header('If-None-Match', 'W/"fe010-hQk3q4B/k1m4zDOwdbMVMB9r2hI"')
    req.add_header('Sec-Fetch-Mode', 'navigate')
    req.add_header('Sec-Fetch-Site', 'none')
    req.add_header('Sec-Fetch-User', '?1')

    req.add_header('Upgrade-Insecure-Requests', '1')
    req.add_header(
        'User-Agent',
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3809.100 Safari/537.36"
    )

    res = urllib.request.urlopen(tempUrl)
    content = res.read()
    encoding = res.info().get('Content-Encoding')
    if encoding == 'gzip':
        content = gzipData(content)
    elif encoding == 'deflate':
        content = deflate(content)
    soup = BeautifulSoup(content, 'html.parser')
    trs = soup.select('li .clearfix-row')
    map = {}

    for tr in trs:
        leixing, leixing2 = changeleixing(
            tr.select('.event-name span')[0].get_text().strip())
        if leixing not in leixings and (leixing2 == '' or
                                        (leixing2 != ''
                                         and leixing2 not in leixings)):
            continue
        zhudui = changeTeamName(
            tr.select('.lab-team-home .name')[0].get_text().strip())
        kedui = changeTeamName(
            tr.select('.lab-team-away .name')[0].get_text().strip())

        zhubifen = tr.select(
            '.lab-score .color-red')[0].get_text().strip().split('-')[0]
        kebifen = tr.select(
            '.lab-score .color-red')[0].get_text().strip().split('-')[1]
        linchangpankou = dealPankou(
            tr.select('.lab-ratel')[0].get_text().strip())
        if linchangpankou is None:
            continue
        element = {
            'zhudui': zhudui,
            'kedui': kedui,
            'linchangpankou': linchangpankou,
            'zhubifen': zhubifen,
            'kebifen': kebifen
        }
        elements = []
        if map.__contains__(leixing):
            elements = map.get(leixing)
        elements.append(element)
        map[leixing] = elements
        if leixing2 != '':
            elements = []
            if map.__contains__(leixing2):
                elements = map.get(leixing2)
            elements.append(element)
            map[leixing2] = elements
    return map