Python same_domain Examples

Programming Language: Python

Namespace/Package Name: common

Method/Function: same_domain

Examples at hotexamples.com: 4

Python same_domain - 4 examples found. These are the top rated real world Python examples of common.same_domain extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: xpath.py Project: akhdir/price-extraction

 def normalize_link(link):
     if urlparse.urlsplit(link).scheme in ('http', 'https', ''):
         if '#' in link:
             link = link[:link.index('#')]
         if url:
             link = urlparse.urljoin(url, link)
             if not local and common.same_domain(url, link):
                 # local links not included
                 link = None
             if not external and not common.same_domain(url, link):
                 # external links not included
                 link = None
     else:
         link = None  # ignore mailto, etc
     return link

Example #2

Show file

File: xpath.py Project: huligong1234/python-study

 def normalize_link(link):
     if urlsplit(link).scheme in ('http', 'https', ''):
         if '#' in link:
             link = link[:link.index('#')]
         if url:
             link = urljoin(url, link)
             if not local and common.same_domain(url, link):
                 # local links not included
                 link = None
             if not external and not common.same_domain(url, link):
                 # external links not included
                 link = None
     else:
         link = None # ignore mailto, etc
     return link

Example #3

Show file

File: download.py Project: amumu/webscraping

 def valid(link):
     """Check if should crawl this link
     """
     # check if a media file
     if common.get_extension(link) not in common.MEDIA_EXTENSIONS:
         # check if a proper HTTP link
         if link.lower().startswith('http'):
             # only crawl within website
             if common.same_domain(domain, link):
                 # passes regex
                 if self.allowed_urls.match(link) and not self.banned_urls.match(link):
                     # not blocked by robots.txt
                     if not self.robots or self.robots.can_fetch(settings.user_agent, link):
                         # allowed to recrawl
                         if self.crawl_existing or (D.cache and link not in D.cache):
                             return True
     return False

Example #4

Show file

File: download.py Project: richardpenman/webscraping

 def valid(link):
     """Check if should crawl this link
     """
     # check if a media file
     if common.get_extension(link) not in common.MEDIA_EXTENSIONS:
         # check if a proper HTTP link
         if link.lower().startswith('http'):
             # only crawl within website
             if common.same_domain(domain, link):
                 # passes regex
                 if self.allowed_urls.match(link) and not self.banned_urls.match(link):
                     # not blocked by robots.txt
                     if not self.robots or self.robots.can_fetch(settings.user_agent, link):
                         # allowed to recrawl
                         if self.crawl_existing or (D.cache and link not in D.cache):
                             return True
     return False