Esempi in Python per DbTool

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: search_engine.dbtools

Classe/tipologia: DbTool

Esempi su hotexamples.com: 4

DbTool in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per search_engine.dbtools.DbTool, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

DbTool(1)

add_link_ref(1)

add_to_index(1)

commit(1)

Esempio n. 1

Mostra file

File: news_spider.py Progetto: siolag161/collective-intelligence

class NewsSpider(BaseSpider):
    name = "newyorker"
    allowed_domains = ["newyorker.com"]
    start_urls = [
        "http://www.newyorker.com",
    ]

             
    def __init__(self):
		dispatcher.connect(self.on_spider_closed, signals.spider_closed)
		self.db = DbTool('search_engine.sql')

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        curr_url = response.url
        txt = hxs.select('//body')
        if txt: 
            txt = remove_tags(txt.extract()[0])
            self.db.add_to_index(curr_url, txt)
            #for word in self.db.separate_words(txt): print word
		
        urls =  hxs.select('//a[contains(@href,".html")]/@href')
        if urls:
            urls = urls.extract()
            #self.db.commit()
            for url in urls: 
                if url.find("'")!=-1 : continue
                url=url.split('#')[0]
                if url[0:4] !='http': 
                    url = '%s%s'%(base_url, url)
                if urlparse.urlsplit(url)[1].split(':')[0].startswith('www.newyorker.com'):
                    link_text = remove_tags(url)
                    self.db.add_link_ref(curr_url, url, link_text)                
                    yield Request(url, self.parse)                        
            

    
                
    def on_spider_closed(self):
        self.db.commit()

Esempio n. 2

Mostra file

File: news_spider.py Progetto: siolag161/collective-intelligence

class NewsSpider(BaseSpider):
    name = "newyorker"
    allowed_domains = ["newyorker.com"]
    start_urls = [
        "http://www.newyorker.com",
    ]

    def __init__(self):
        dispatcher.connect(self.on_spider_closed, signals.spider_closed)
        self.db = DbTool('search_engine.sql')

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        curr_url = response.url
        txt = hxs.select('//body')
        if txt:
            txt = remove_tags(txt.extract()[0])
            self.db.add_to_index(curr_url, txt)
            #for word in self.db.separate_words(txt): print word

        urls = hxs.select('//a[contains(@href,".html")]/@href')
        if urls:
            urls = urls.extract()
            #self.db.commit()
            for url in urls:
                if url.find("'") != -1: continue
                url = url.split('#')[0]
                if url[0:4] != 'http':
                    url = '%s%s' % (base_url, url)
                if urlparse.urlsplit(url)[1].split(':')[0].startswith(
                        'www.newyorker.com'):
                    link_text = remove_tags(url)
                    self.db.add_link_ref(curr_url, url, link_text)
                    yield Request(url, self.parse)

    def on_spider_closed(self):
        self.db.commit()

Esempio n. 3

Mostra file

File: news_spider.py Progetto: siolag161/collective-intelligence

    def __init__(self):
		dispatcher.connect(self.on_spider_closed, signals.spider_closed)
		self.db = DbTool('search_engine.sql')

Esempio n. 4

Mostra file

File: news_spider.py Progetto: siolag161/collective-intelligence

 def __init__(self):
     dispatcher.connect(self.on_spider_closed, signals.spider_closed)
     self.db = DbTool('search_engine.sql')