Beispiel #1
0
def Crawl_Sport_Title(response):
    _title = []
    _link = []
    item = DmozItem()
    hxs = HtmlXPathSelector(response)
    item['title'] = hxs.select("//div[@class='dd_bt']//a/text()").extract()
    item['link'] = hxs.select("//div[@class='dd_bt']//a/@href").extract()
    dataOptionGet = DataOptionGet()
    latest_link = dataOptionGet.get_Last_Link_Sport()
    for i in range(len(item['title'])):
        if "http://www.chinanews.com"+str(item['link'][i]) == latest_link:
            break
        _title.append(item['title'][i])
        _link.append(item['link'][i])
    if len(_link) > 0:
        dataOptionSave = DataOptionSave()
        dataOptionSave.Sport_Title_Save(_link,_title)    
Beispiel #2
0
def Crawl_Entertainment_Title(response):
    _title = []
    _link = []
    item = DmozItem()
    hxs = HtmlXPathSelector(response)
    item['title'] = hxs.select("//div[@class=\"news-item  img-news-item\"]//h2//a/text()").extract()
    item['link'] = hxs.select("//div[@class=\"news-item  img-news-item\"]//h2//a/@href").extract()
    dataOptionGet = DataOptionGet()
    latest_link = dataOptionGet.get_Last_Link_ET()
    for i in range(len(item['title'])):
        if str(item['link'][i]) == latest_link:
            break
        _title.append(item['title'][i])
        _link.append(item['link'][i])
    if len(_link) > 0:
        dataOptionSave = DataOptionSave()
        dataOptionSave.Entertainment_Title_Save(_link,_title) 
Beispiel #3
0
def Crawl_Community_Title(response):
    _title = []
    _link = []
    item = DmozItem()
    hxs = HtmlXPathSelector(response)
    item['title'] = hxs.select("//div[@class=\"newsList\"]//ul//li//a/text()").extract()
    item['link'] = hxs.select("//div[@class=\"newsList\"]//ul//li//a/@href").extract()
    dataOptionGet = DataOptionGet()
    latest_link = dataOptionGet.get_Last_Link_CO()
    for i in range(len(item['title'])):
        if str(item['link'][i]) == latest_link:
            break
        _title.append(item['title'][i])
        _link.append(item['link'][i])
    if len(_link) > 0:
        dataOptionSave = DataOptionSave()
        dataOptionSave.Comu_Title_Save(_link,_title) 
Beispiel #4
0
def Crawl_Inter_Title(response):
    _title = []
    _link = []
    item = DmozItem()
    hxs = HtmlXPathSelector(response)
    sites = hxs.select('//a')
    dataOptionGet = DataOptionGet()
    latest_link = dataOptionGet.get_Last_Link_IN()
    for site in sites:
        item['title'] = site.select("//a[@target='_blank' and @class='linkto']/text()").extract()
        item['link'] = site.select("//a[@target='_blank' and @class='linkto']/@href").extract()
        break
    for i in range(len(item['title'])):
        if "http://news.qq.com"+str(item['link'][i]) == latest_link:
            break
        _title.append(item['title'][i])
        _link.append(item['link'][i])
    if len(_link) > 0:
        dataOptionSave = DataOptionSave()
        dataOptionSave.Inter_Title_Save(str(_link), str(_title))