Example #1
0
def getPageTotal():
    pattern = u"//td[@valign='middle']/text()"
    url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    page = spider.getPage(url)
    node = spider.getNodes(page, pattern)
    s = ''.join(node[-1]).encode('utf-8').strip() # 如:/25页
    return int(s[1:len(s)-7])    # 最后一个“页”字占七个字节
Example #2
0
def getPageTotal():
    pattern = u"//td[@valign='middle']/text()"
    url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    page = spider.getPage(url)
    node = spider.getNodes(page, pattern)
    s = ''.join(node[-1]).encode('utf-8').strip()  # 如:/25页
    return int(s[1:len(s) - 7])  # 最后一个“页”字占七个字节
Example #3
0
def getIndexSSQ(index):
    result = []
    url = ""
    if index-1 == 0:
        url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    else:
        url = SITE + "/kjxx/ssq/kjgg/index_%d.shtml" % (index - 1)

    indexPage = spider.getPage(url)
    nodes = spider.getNodes(indexPage, PATTERN_HERF)
    for node in nodes:
        ssqUrl = SITE + node.attrib['href'][8:]
        itemPage = spider.getPage(ssqUrl)
        ssqNodes = spider.getNodes(itemPage, PATTERN_NODE)
        temp = [x.text for x in ssqNodes]

        # 获取期数信息
        nos = _getTime(itemPage)

        # 将期数等信息组合到结果中去
        itemResult = {'red':temp[:-1],'blue':temp[-1:]}
        itemResult.update(nos)
        result.append(itemResult)
    return result
Example #4
0
def getIndexSSQ(index):
    result = []
    url = ""
    if index - 1 == 0:
        url = SITE + "/kjxx/ssq/kjgg/index.shtml"
    else:
        url = SITE + "/kjxx/ssq/kjgg/index_%d.shtml" % (index - 1)

    indexPage = spider.getPage(url)
    nodes = spider.getNodes(indexPage, PATTERN_HERF)
    for node in nodes:
        ssqUrl = SITE + node.attrib['href'][8:]
        itemPage = spider.getPage(ssqUrl)
        ssqNodes = spider.getNodes(itemPage, PATTERN_NODE)
        temp = [x.text for x in ssqNodes]

        # 获取期数信息
        nos = _getTime(itemPage)

        # 将期数等信息组合到结果中去
        itemResult = {'red': temp[:-1], 'blue': temp[-1:]}
        itemResult.update(nos)
        result.append(itemResult)
    return result
Example #5
0
def getPage(url):
    return spider.getPage(url, 'gb2312')