Esempi in Python per Crawler.visit

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: crawler

Classe/tipologia: Crawler

Metodo/funzione: visit

Esempi su hotexamples.com: 3

Crawler.visit in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per crawler.Crawler.visit, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Crawler(30)

__init__(27)

map(15)

assets_json(5)

info(4)

visit(3)

analyze(3)

_get_url_contents(3)

__subclasses__(3)

get_Torrents_List(2)

load(2)

download_Page_Files(2)

crawl_web(2)

reset(2)

response(2)

add_data(2)

_same_host(2)

_has_product(2)

return_soup(2)

dump(2)

AddURLs(2)

Grab(2)

Start(2)

ToggleTOR(2)

Update(2)

isValidUrl(1)

open_browser(1)

open(1)

mostrarConfig(1)

GetInfoNames(1)

GetInfoValues(1)

keepUrl(1)

isAlive(1)

poll(1)

insert_root(1)

GetTasks(1)

headers(1)

get_top_news(1)

get_result(1)

get_records(1)

get_pagelist(1)

get_headers(1)

get_forms(1)

output_csv(1)

recuperarInf(1)

post(1)

scrape_registrations(1)

submit(1)

silent(1)

show_imagelist(1)

Esempio n. 1

Mostra file

File: dblp.py Progetto: layinah/bibliographyScraping

from crawler import Crawler

def get_title(text):
    a = text[text.index(':')+1:]
    return a[:a.index('.')]

localhost = False
search_term = 'Giuseppe Vizzari'

if localhost:
    dblp = Crawler(base_url='http://localhost', port=50001)
    dblp.visit('/dblp.html')
else:
    dblp = Crawler(base_url='http://www.informatik.uni-trier.de', port=50001)
    dblp.visit('/~ley/db/indices/a-tree/index.html')
    dblp.search('//*[@name="author"]', search_term)

trs = dblp.sess.xpath('//p[1]/table/tbody/tr')
print trs

for tr in trs:
    tds = tr.xpath('./td')
    if(tds):
        print "%s -> %s"%(tds[0].text(), get_title(tds[2].text())) #tds[2].text(),

Esempio n. 2

Mostra file

File: scholar.py Progetto: layinah/bibliographyScraping

from crawler import Crawler

localhost = False
npage = 1
new_page = True
search_term = 'Giuseppe Vizzari'

if localhost:
    scholar = Crawler(port=50002)
    scholar.visit('http://localhost/scholar.htm')
else:
    scholar = Crawler(base_url = 'http://scholar.google.com', port=50002)
    scholar.visit('/')
    scholar.search('//*[@name="q"]', search_term)

while(new_page):
    new_page = False
    divs = scholar.sess.xpath('//*[@class="gs_r"]')

    for div in divs:
        papers = div.xpath('./*[@class="gs_rt"]/a')
        print "papers: %s"%papers

        if(papers):
            print papers[0].text()

        citedbies = div.xpath('./*[@class="gs_fl"]/a')
        print "citedbies: %s"%citedbies

        if(citedbies):
            print citedbies[0]["href"]

Esempio n. 3

Mostra file

File: main.py Progetto: sathoro/python-crawler

from crawler import Crawler
from bs4 import BeautifulSoup

crawler = Crawler()

# returns a list of the available forms
forms = crawler.get_forms('https://github.com/login')

# submits the login form
crawler.submit(forms[1], {'login': '******', 'password': '******'})

# just a simple GET request, returns a response object
response = crawler.visit('https://github.com')

# lets use BeautifulSoup to parse the response text
html = BeautifulSoup(response.text, 'lxml')

# get a list of our repositories by scraping the html
for repo in html.find(id='repo_listing').find_all('span', {'class': 'repo'}):
	print repo.text

print '\n', crawler.get_cookies()
print '\n', crawler.get_headers()