Example #1
0
def getPageTotal():
    pattern = u"//td[@valign='middle']/text()"
    url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    page = spider.getPage(url)
    node = spider.getNodes(page, pattern)
    s = ''.join(node[-1]).encode('utf-8').strip() # 如:/25页
    return int(s[1:len(s)-7])    # 最后一个“页”字占七个字节
Example #2
0
def _getTime(page):
    pattern = u"//li[@class='caizhong']//span"
    nodes = spider.getNodes(page, pattern)
    return {
        'no':nodes[0].text[1:-1],
        'time':nodes[1].text[5:]
        }
Example #3
0
def getPageTotal():
    pattern = u"//td[@valign='middle']/text()"
    url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    page = spider.getPage(url)
    node = spider.getNodes(page, pattern)
    s = ''.join(node[-1]).encode('utf-8').strip()  # 如:/25页
    return int(s[1:len(s) - 7])  # 最后一个“页”字占七个字节
Example #4
0
def getIndexSSQ(index):
    result = []
    url = ""
    if index-1 == 0:
        url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    else:
        url = SITE + "/kjxx/ssq/kjgg/index_%d.shtml" % (index - 1)

    indexPage = spider.getPage(url)
    nodes = spider.getNodes(indexPage, PATTERN_HERF)
    for node in nodes:
        ssqUrl = SITE + node.attrib['href'][8:]
        itemPage = spider.getPage(ssqUrl)
        ssqNodes = spider.getNodes(itemPage, PATTERN_NODE)
        temp = [x.text for x in ssqNodes]

        # 获取期数信息
        nos = _getTime(itemPage)

        # 将期数等信息组合到结果中去
        itemResult = {'red':temp[:-1],'blue':temp[-1:]}
        itemResult.update(nos)
        result.append(itemResult)
    return result
Example #5
0
def getIndexSSQ(index):
    result = []
    url = ""
    if index - 1 == 0:
        url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    else:
        url = SITE + "/kjxx/ssq/kjgg/index_%d.shtml" % (index - 1)

    indexPage = spider.getPage(url)
    nodes = spider.getNodes(indexPage, PATTERN_HERF)
    for node in nodes:
        ssqUrl = SITE + node.attrib['href'][8:]
        itemPage = spider.getPage(ssqUrl)
        ssqNodes = spider.getNodes(itemPage, PATTERN_NODE)
        temp = [x.text for x in ssqNodes]

        # 获取期数信息
        nos = _getTime(itemPage)

        # 将期数等信息组合到结果中去
        itemResult = {'red': temp[:-1], 'blue': temp[-1:]}
        itemResult.update(nos)
        result.append(itemResult)
    return result
Example #6
0
def _getTime(page):
    pattern = u"//li[@class='caizhong']//span"
    nodes = spider.getNodes(page, pattern)
    return {'no': nodes[0].text[1:-1], 'time': nodes[1].text[5:]}
Example #7
0
def getIndexUrlList(page):
    nodes = spider.getNodes(page, u"//a[@class='ulink']")
    print nodes[0].attrib
    return [SITE + node.attrib['href'] for node in nodes]
Example #8
0
def getTotal(page):
    nodes = spider.getNodes(page, u"//select[@name='sldd']//option/text()")
    return int(nodes[-1])
Example #9
0
def getDetailForOnePage(page):
    nodes = spider.getNodes(page, u"//td[@bgcolor]//a")
    print nodes[0].attrib['href']