def __init__(self, name, *args, **kwargs): config = get_config(name) self.config = config self.rules = rules.get(config.get('rules')) self.start_urls = config.get('start_urls') self.allowed_domains = config.get('allowed_domains') super(UniversalSpider, self).__init__(*args, **kwargs)
def __init__(self, name, *args, **kwargs): # init方法中,、、 config = get_config(name) self.config = config self.rules = rules.get(config.get('rules')) # rules属性另外读取了rules.py的配置 self.start_urls = config.get('allowed_domains') # start_urls被赋值 self.allowed_domains = config.get('allowed_domains') # allowed_domains被赋值 super(UniversalSpider, self).__init__(*args, **kwargs)
def __init__(self, name, *args, **kwargs): config = get_config(name) self.config = config self.rules = rules.get(config.get('rules')) start_urls = config.get('start_urls') if start_urls: if start_urls.get('type') == 'static': self.start_urls = start_urls.get('value') elif start_urls.get('type') == 'dynamic': self.start_urls = list(eval('urls.' + start_urls.get('method'))(*start_urls.get('args', []))) self.allowed_domains = config.get('allowed_domains') super(UniversalSpider, self).__init__(*args, **kwargs)
def __init__(self, name, keyword, *args, **kwargs): config = get_config(name) self.config = config self.rules = rules.get(config.get('rules')) self.name = name self.keyword = keyword #self.start_urls = ["http://106.38.57.66:8080/oasearch/front/search.do"] #if start_urls: #if start_urls.get("type") == 'static': #self.start_urls = start_urls.get("value") #elif start_urls.get("type") == 'dynamic': #self.start_urls = list(eval('urls.' + start_urls.get('method'))(*start_urls.get('args',[]),keyword)) self.allowed_domains = config.get('allowed_domains') super(UniversalSpider, self).__init__(*args, **kwargs)