Python URLValidator.formValidURL Exemples

Langage de programmation: Python

Espace de nommage/Pack: URLValidator

Class/Type: URLValidator

Méthode/Fonction: formValidURL

Exemples au hotexamples.com: 2

Python URLValidator.formValidURL - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de URLValidator.URLValidator.formValidURL extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

formValidURL(1)

isValidURL(1)

Méthodes fréquemment utilisées

formValidURL (1)

isValidURL (1)

Exemple #1

0

Afficher le fichier

Fichier : LinkExtracter.py Projet : prashanthrajendran/codingadventures

def fetchLinks(self, url): dom = None try: response = urlopen(url) contenttype = response.info()['Content-Type'] contenttype = contenttype.strip().lower() ''' if the conten-type is not text/html the url is rejected as it could be images or something else that could not be parsed ''' if not contenttype.startswith('text/html'): self.log.debug('rejected = ' + url + ' Content-Type = ' + contenttype) return dom = parse(response).getroot() except urllib.error.HTTPError as e1: self.log.warn(''.join(['HTTP error for ', url])) self.log.debug(e1, exc_info=sys.exc_info()[2]) return except Exception as e2: self.log.warn('cannot parse ' + url) self.log.debug(e2, exc_info=sys.exc_info()[2]) return if not dom is None: links = dom.cssselect('a') if not links is None: linksList = [] for link in links: if not link is None: href = link.get('href') if href is None: continue suburl = URLValidator.formValidURL(url, href) if not suburl is None: linksList.append(suburl) else: self.log.debug('rejected href = ' + href) if not linksList: return None else: return linksList

Exemple #2

0

Afficher le fichier

Fichier : LinkExtracter.py Projet : prashanthrajendran/codingadventures

def fetchLinks(self,url): dom = None try: response = urlopen(url) contenttype = response.info()['Content-Type'] contenttype = contenttype.strip().lower() ''' if the conten-type is not text/html the url is rejected as it could be images or something else that could not be parsed ''' if not contenttype.startswith('text/html'): self.log.debug('rejected = '+url+' Content-Type = '+contenttype) return dom = parse(response).getroot() except urllib.error.HTTPError as e1: self.log.warn(''.join(['HTTP error for ',url])) self.log.debug(e1,exc_info=sys.exc_info()[2]) return except Exception as e2: self.log.warn('cannot parse '+url) self.log.debug(e2,exc_info=sys.exc_info()[2]) return if not dom is None: links = dom.cssselect('a') if not links is None: linksList = [] for link in links: if not link is None: href = link.get('href') if href is None: continue suburl = URLValidator.formValidURL(url,href) if not suburl is None: linksList.append(suburl) else: self.log.debug('rejected href = '+href) if not linksList: return None else: return linksList