예제 #1
0
def post_request(url, seq, seq_no):
    body = get_body(seq, seq_no)
    r = requests.post(url, data=body)
    #print("Request status: ", r.status_code, r.reason)
    p = html_parser.Parser(r.text)
    result = p.pars_html()
    return result
예제 #2
0
 def __init__(self):
     self.manager = Manager.Manager()
     self.downloader = html_downloader.Downloader()
     self.saver = html_saver.Saver()
     self.parser = html_parser.Parser(self.manager)
     self.manager.add_one_url("https://book.douban.com/")
     #self.manager.add_one_url("https://www.baidu.com/")
     self.limits = 100
 def __init__(self):
     # 初始化爬虫的管理器
     self.manager = url_manager.UrlManager()
     # 初始化输出器
     self.outputer = html_outputer.Outputer()
     # 初始化解析器
     self.parser = html_parser.Parser()
     # 初始化下载器
     self.downloader = html_downloader.Downloader()
예제 #4
0
 def __init__(self):
     # 初始化url调度器
     self.urls = url_manager.UrlManager()
     # 初始化下载器
     self.downloader = html_downloader.HtmlDownloader()
     # 初始化解析器
     self.parser = html_parser.Parser()
     # 内容输出器
     self.outputer = html_outputer.HtmlOutputer()
     # 内容存储器
     self.saver = db_saver.DBSaver()
예제 #5
0
 def __init__(self):
     self.url = url_manager.UrlManager()
     self.downloader = html_downloader.Downloader()
     self.parser = html_parser.Parser()
     self.generator = html_generator.Generator()
예제 #6
0
 def __init__(self):  # object 和 self 是什么关系?
     self.urls = url_manager.Manager()  # 文件名(库名).类?
     self.download = html_downloader.Downloader()
     self.parser = html_parser.Parser()
     self.output = html_outputer.Outputer()
예제 #7
0
 def __init__(self, url):
         self.urls = url_manager.UrlManager()
         self.downloader = html_downloader.HtmlDownloader()
         self.parser = html_parser.Parser(url)
         self.outputer = outputer.Outputer()
         self.root_url = url
예제 #8
0
 def __init__(self):
     self.parser = html_parser.Parser()
     self.output = html_output.Output()
예제 #9
0
def parser_factory(type, settings):
    if html_parser.TYPE_HTML == type:
        return html_parser.Parser(settings)
예제 #10
0
 def __init__(self):
     self.urls = url_manager.UrlManager()
     self.downloader = page_downloader.Downloader()
     self.parser = html_parser.Parser()
     self.outputers = outputer.Outputer()
예제 #11
0
 def parse_race_as_html(self):
     print('Parsing race as html')
     self.html_parser = html_parser.Parser(self.racename, debug=True)
     self.race = self.html_parser.parse_page()
     self.html_parser.quit()