Esempio n. 1
0
    def crawl(self, url, **kwargs):
        '''
        available params:
          url
          callback

          method
          params
          data
          files
          headers
          timeout
          allow_redirects
          cookies
          proxy
          etag
          last_modified
          auto_recrawl

          fetch_type
          js_run_at
          js_script
          js_viewport_width
          js_viewport_height
          load_images

          priority
          retries
          exetime
          age
          itag
          cancel

          save
          taskid

          full documents: http://pyspider.readthedocs.org/en/latest/apis/self.crawl/
        '''

        if isinstance(url, six.string_types) and url.startswith('curl '):
            curl_kwargs = curl_to_arguments(url)
            url = curl_kwargs.pop('urls')
            for k, v in iteritems(curl_kwargs):
                kwargs.setdefault(k, v)

        if isinstance(url, six.string_types):
            return self._crawl(url, **kwargs)
        elif hasattr(url, "__iter__"):
            result = []
            for each in url:
                result.append(self._crawl(each, **kwargs))
            return result
Esempio n. 2
0
    def crawl(self, url, **kwargs):
        '''
        available params:
          url
          callback

          method
          params
          data
          files
          headers
          timeout
          allow_redirects
          cookies
          proxy
          etag
          last_modified
          auto_recrawl

          fetch_type
          js_run_at
          js_script
          js_viewport_width
          js_viewport_height
          load_images

          priority
          retries
          exetime
          age
          itag
          cancel

          save
          taskid

          full documents: http://pyspider.readthedocs.org/en/latest/apis/self.crawl/
        '''

        if isinstance(url, six.string_types) and url.startswith('curl '):
            curl_kwargs = curl_to_arguments(url)
            url = curl_kwargs.pop('urls')
            for k, v in iteritems(curl_kwargs):
                kwargs.setdefault(k, v)

        if isinstance(url, six.string_types):
            return self._crawl(url, **kwargs)
        elif hasattr(url, "__iter__"):
            result = []
            for each in url:
                result.append(self._crawl(each, **kwargs))
            return result