コード例 #1
0
ファイル: scraping.py プロジェクト: I-TREND/SASF
 def __iter__(self):
     from quirks.iterable import count
     from urlparse import urljoin
     seq = count()
     while True:
         current_url = self.session.url()
         absolute_url = lambda url: urljoin(current_url, url)
         process_values = combinator(
             lambda  (key, value): (key, absolute_url(value) if key == 'url' else value),
         )
         results = self._values(self.session,self.li)
         if not results:
             results = []
         for result in results :
             yield dict( chain(
                 imap(process_values, ( (key,self._value(result, selector)) for key,selector in self.fields.iteritems() )),
                 ( ('_seq', seq.next()), ('_url', self._url(self.session)) )
             ) )
         if not self.next:
             return
         next = self._value(self.session,self.next)
         if not next:
             return
         self.__url = None
         next.click()
         time.sleep(0.5)
         self.session.wait()
         logger.debug('visited "%s"'%self.session.url() )
コード例 #2
0
 def _resolve(self, domain):
     if not hasattr(self, 'query'):
         from dns.resolver import query
         self._query = ensure_iterable(maybe(query))
     return imap(
         str,
         ifilter(
             None,
             chain(self._query(domain, 'A'), self._query(domain, 'AAAA'))))
コード例 #3
0
 def __call__(self, url):
     domain = self._get_domain(url)
     return dict(
         chain(
             self._get_ip_addresses(domain).iteritems(),
             self._get_whois(domain).iteritems()))
コード例 #4
0
ファイル: addrutil.py プロジェクト: pborky/scrap
 def __call__(self, url):
     domain = self._get_domain(url)
     return dict(chain(self._get_ip_addresses(domain).iteritems(), self._get_whois(domain).iteritems()))
コード例 #5
0
ファイル: addrutil.py プロジェクト: pborky/scrap
 def _resolve(self, domain):
     if not hasattr(self, 'query'):
         from dns.resolver import query
         self._query = ensure_iterable(maybe(query))
     return imap(str, ifilter(None, chain(self._query(domain, 'A'),self._query(domain, 'AAAA'))))