Beispiel #1
0
    def __init__(self,
                 name,
                 spec,
                 item_schemas,
                 all_extractors,
                 settings=None,
                 **kw):
        self.start_url_generators = {
            'start_urls': IdentityGenerator(),
            'generated_urls': UrlGenerator(settings, kw),
            'url': IdentityGenerator(),
            'feed': FeedGenerator(self.parse),
            'generated': FragmentGenerator(),
        }
        self.generic_form = GenericForm(**kw)
        super(IblSpider, self).__init__(name, **kw)
        spec = deepcopy(spec)
        self._add_spider_args_to_spec(spec, kw)
        self._configure_js(spec, settings)
        self.plugins = self._configure_plugins(settings, spec, item_schemas,
                                               all_extractors)

        self.login_requests, self.form_requests = [], []
        self._start_urls = self._create_start_urls(spec)
        self._start_requests = self._create_start_requests(spec)
        self._create_init_requests(spec)
        self._add_allowed_domains(spec)
        self.page_actions = spec.get('page_actions', [])
Beispiel #2
0
 def setUp(self):
     self.generators = {
         'start_urls': IdentityGenerator(),
         'generated_urls': UrlGenerator(),
         'url': IdentityGenerator(),
         'generated': FragmentGenerator(),
     }
Beispiel #3
0
    def __init__(self, name, spec, item_schemas, all_extractors, settings=None,
                 **kw):
        self.start_url_generators = {
            'start_urls': IdentityGenerator(),
            'generated_urls': UrlGenerator(settings, kw),

            'url': IdentityGenerator(),
            'feed': FeedGenerator(self.parse),
            'generated': FragmentGenerator(),
        }
        self.generic_form = GenericForm(**kw)
        super(IblSpider, self).__init__(name, **kw)
        spec = deepcopy(spec)
        self._add_spider_args_to_spec(spec, kw)

        #if actions configured, then set js_enabled as true, and put url of each action to js_enable_patterns. 
        self.actions = spec.get('actions', [])
        '''
        if len(self.actions):
            spec['js_enabled']= True 
            enable_patterns = spec.get('js_enable_patterns', [] )
            for action in self.actions: 
                enable_patterns.append(action.get('url'))
            spec['js_enable_patterns']= enable_patterns
        '''

 
        self._configure_js(spec, settings)
        self.plugins = self._configure_plugins(
            settings, spec, item_schemas, all_extractors)

        self.login_requests, self.form_requests = [], []
        self._start_urls = self._create_start_urls(spec)
        self._start_requests = self._create_start_requests(spec)
        self._create_init_requests(spec)
        self._add_allowed_domains(spec)
        self.page_actions = spec.get('page_actions', [])
 def test_start_urls(self):
     self.assertEqual(self.github_start_urls,
                      IdentityGenerator()(self.github_start_urls))