def post_request(url, seq, seq_no): body = get_body(seq, seq_no) r = requests.post(url, data=body) #print("Request status: ", r.status_code, r.reason) p = html_parser.Parser(r.text) result = p.pars_html() return result
def __init__(self): self.manager = Manager.Manager() self.downloader = html_downloader.Downloader() self.saver = html_saver.Saver() self.parser = html_parser.Parser(self.manager) self.manager.add_one_url("https://book.douban.com/") #self.manager.add_one_url("https://www.baidu.com/") self.limits = 100
def __init__(self): # 初始化爬虫的管理器 self.manager = url_manager.UrlManager() # 初始化输出器 self.outputer = html_outputer.Outputer() # 初始化解析器 self.parser = html_parser.Parser() # 初始化下载器 self.downloader = html_downloader.Downloader()
def __init__(self): # 初始化url调度器 self.urls = url_manager.UrlManager() # 初始化下载器 self.downloader = html_downloader.HtmlDownloader() # 初始化解析器 self.parser = html_parser.Parser() # 内容输出器 self.outputer = html_outputer.HtmlOutputer() # 内容存储器 self.saver = db_saver.DBSaver()
def __init__(self): self.url = url_manager.UrlManager() self.downloader = html_downloader.Downloader() self.parser = html_parser.Parser() self.generator = html_generator.Generator()
def __init__(self): # object 和 self 是什么关系? self.urls = url_manager.Manager() # 文件名(库名).类? self.download = html_downloader.Downloader() self.parser = html_parser.Parser() self.output = html_outputer.Outputer()
def __init__(self, url): self.urls = url_manager.UrlManager() self.downloader = html_downloader.HtmlDownloader() self.parser = html_parser.Parser(url) self.outputer = outputer.Outputer() self.root_url = url
def __init__(self): self.parser = html_parser.Parser() self.output = html_output.Output()
def parser_factory(type, settings): if html_parser.TYPE_HTML == type: return html_parser.Parser(settings)
def __init__(self): self.urls = url_manager.UrlManager() self.downloader = page_downloader.Downloader() self.parser = html_parser.Parser() self.outputers = outputer.Outputer()
def parse_race_as_html(self): print('Parsing race as html') self.html_parser = html_parser.Parser(self.racename, debug=True) self.race = self.html_parser.parse_page() self.html_parser.quit()