def crawl(self, url, **kwargs): ''' available params: url callback method params data files headers timeout allow_redirects cookies proxy etag last_modified auto_recrawl fetch_type js_run_at js_script js_viewport_width js_viewport_height load_images priority retries exetime age itag cancel save taskid full documents: http://pyspider.readthedocs.org/en/latest/apis/self.crawl/ ''' if isinstance(url, six.string_types) and url.startswith('curl '): curl_kwargs = curl_to_arguments(url) url = curl_kwargs.pop('urls') for k, v in iteritems(curl_kwargs): kwargs.setdefault(k, v) if isinstance(url, six.string_types): return self._crawl(url, **kwargs) elif hasattr(url, "__iter__"): result = [] for each in url: result.append(self._crawl(each, **kwargs)) return result