Example #1
0
col_processed = db['ir_domains_processed']

domains = col.find()
domain_count = col.count()
i = 0
avail_string = 'ERROR:101: no entries found'

for domain in domains:
    if i % 100 == 0:
        print(i)
    whois = ''
    try:
        # print(domain['domain'])
        link = "http://whois.nic.ir/WHOIS?name=%s.ir" % domain['domain']
        # print(link)
        html = get_url(link)
        soap = BeautifulSoup(html, "html.parser")
        try:
            whois = soap.select(selector='pre')
        except Exception, e:
            print('*************ERROR*****************')
            print(e.message)
    except Exception, e:
        print('*******************')
        print(e.message)
        print('Error in domain: ' + domain['domain'])
    col.remove({'domain': domain['domain']})
    available = avail_string in domain['whois']
    col_processed.insert({
        'domain': domain['domain'],
        'whois': str(whois),
Example #2
0
col_processed = db['ir_domains_processed']

domains = col.find()
domain_count = col.count()
i = 0
avail_string = 'ERROR:101: no entries found'

for domain in domains:
    if i % 100 == 0:
        print(i)
    whois = ''
    try:
        # print(domain['domain'])
        link = "http://whois.nic.ir/WHOIS?name=%s.ir" % domain['domain']
        # print(link)
        html = get_url(link)
        soap = BeautifulSoup(html, "html.parser")
        try:
            whois = soap.select(selector='pre')
        except Exception, e:
            print('*************ERROR*****************')
            print(e.message)
    except Exception, e:
        print('*******************')
        print(e.message)
        print('Error in domain: ' + domain['domain'])
    col.remove({'domain': domain['domain']})
    available = avail_string in domain['whois']
    col_processed.insert({
        'domain': domain['domain'],
        'whois': str(whois),
Example #3
0


col = db['pubmed_articles']
# col_processed = db['ir_domains_processed']

# link = 'http://www.ncbi.nlm.nih.gov/pubmed/?term=article'
rource_link = 'http://www.ncbi.nlm.nih.gov/pubmed/?term=cancer'







doc = get_url(rource_link)
doc = BeautifulSoup(doc, "html.parser")
links = doc.select('.rprt')
for link in links:
    print()
print(len(links))
# links =
# domains = col.find()
# domain_count = col.count()
# i = 0
# for domain in domains:
#     if i % 100 == 0:
#         print(i)
#     whois = ''
#     try:
#         # print(domain['domain'])