Python get_url Examples

Programming Language: Python

Namespace/Package Name: crawl_tools

Method/Function: get_url

Examples at hotexamples.com: 3

Python get_url - 3 examples found. These are the top rated real world Python examples of crawl_tools.get_url extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: nic_whois_crawler.py Project: ehsansh84/services

col_processed = db['ir_domains_processed']

domains = col.find()
domain_count = col.count()
i = 0
avail_string = 'ERROR:101: no entries found'

for domain in domains:
    if i % 100 == 0:
        print(i)
    whois = ''
    try:
        # print(domain['domain'])
        link = "http://whois.nic.ir/WHOIS?name=%s.ir" % domain['domain']
        # print(link)
        html = get_url(link)
        soap = BeautifulSoup(html, "html.parser")
        try:
            whois = soap.select(selector='pre')
        except Exception, e:
            print('*************ERROR*****************')
            print(e.message)
    except Exception, e:
        print('*******************')
        print(e.message)
        print('Error in domain: ' + domain['domain'])
    col.remove({'domain': domain['domain']})
    available = avail_string in domain['whois']
    col_processed.insert({
        'domain': domain['domain'],
        'whois': str(whois),

Example #2

Show file

col_processed = db['ir_domains_processed']

domains = col.find()
domain_count = col.count()
i = 0
avail_string = 'ERROR:101: no entries found'

for domain in domains:
    if i % 100 == 0:
        print(i)
    whois = ''
    try:
        # print(domain['domain'])
        link = "http://whois.nic.ir/WHOIS?name=%s.ir" % domain['domain']
        # print(link)
        html = get_url(link)
        soap = BeautifulSoup(html, "html.parser")
        try:
            whois = soap.select(selector='pre')
        except Exception, e:
            print('*************ERROR*****************')
            print(e.message)
    except Exception, e:
        print('*******************')
        print(e.message)
        print('Error in domain: ' + domain['domain'])
    col.remove({'domain': domain['domain']})
    available = avail_string in domain['whois']
    col_processed.insert({
        'domain': domain['domain'],
        'whois': str(whois),

Example #3

Show file

File: pubmed_crawler.py Project: ehsansh84/services



col = db['pubmed_articles']
# col_processed = db['ir_domains_processed']

# link = 'http://www.ncbi.nlm.nih.gov/pubmed/?term=article'
rource_link = 'http://www.ncbi.nlm.nih.gov/pubmed/?term=cancer'







doc = get_url(rource_link)
doc = BeautifulSoup(doc, "html.parser")
links = doc.select('.rprt')
for link in links:
    print()
print(len(links))
# links =
# domains = col.find()
# domain_count = col.count()
# i = 0
# for domain in domains:
#     if i % 100 == 0:
#         print(i)
#     whois = ''
#     try:
#         # print(domain['domain'])