def __init__(self): self.manager = Manager.Manager() self.downloader = html_downloader.Downloader() self.saver = html_saver.Saver() self.parser = html_parser.Parser(self.manager) self.manager.add_one_url("https://book.douban.com/") #self.manager.add_one_url("https://www.baidu.com/") self.limits = 100
def __init__(self): # 初始化爬虫的管理器 self.manager = url_manager.UrlManager() # 初始化输出器 self.outputer = html_outputer.Outputer() # 初始化解析器 self.parser = html_parser.Parser() # 初始化下载器 self.downloader = html_downloader.Downloader()
def __init__(self): self.url = url_manager.UrlManager() self.downloader = html_downloader.Downloader() self.parser = html_parser.Parser() self.generator = html_generator.Generator()
def __init__(self): # object 和 self 是什么关系? self.urls = url_manager.Manager() # 文件名(库名).类? self.download = html_downloader.Downloader() self.parser = html_parser.Parser() self.output = html_outputer.Outputer()
def __init__(self): self.downloader = html_downloader.Downloader() self.parser = html_parser.HtmlParser() self.outputer = json_outputer.JsonOutputer()
def __init__(self): # 初始化URL管理器,下载器,解析器,输出器 self.urls = url_manager.UrlManager() self.downloader = html_downloader.Downloader() self.parser = html_parser.Htmlparser() self.outputer = html_outputer.Htmloutputer()