Esempio n. 1
0
def Crawl_China_Title(response):
    _title = []
    _link = []
    item = DmozItem()
    hxs = HtmlXPathSelector(response)
    sites = hxs.select('//a')
    dataOptionGet = DataOptionGet()
    latest_link = dataOptionGet.get_Last_Link_CH()
    for site in sites:
        item['title'] = site.select("//a[@target='_blank' and @class='linkto']/text()").extract()
        item['link'] = site.select("//a[@target='_blank' and @class='linkto']/@href").extract()
        break
    for i in range(len(item['title'])):
        if "http://news.qq.com"+str(item['link'][i]) == latest_link:
            break
        _title.append(item['title'][i])
        _link.append(item['link'][i])
    if len(_link) > 0:
        dataOptionSave = DataOptionSave()
        dataOptionSave.Chnews_Title_Save(str(_link), str(_title))