def __init__(self): self.downloader = html_downloader.HtmlDownloader() self.parser = html_parser.HtmlParser() self.headers = { 'content-type': 'application/json;charset=utf8', 'Connection': 'close' } self.base_url = 'http://120.78.132.250:8084/admin_api'
def __init__(self): self.urls = url_manage.UrlManager() self.downloader = html_downloader.HtmlDownloader() self.parser = html_parser.HtmlParser() self.outputer = html_outputer.HtmlOutputer() self.titles = [] self.pictures = [] self.links = []
def Climb(self): #request = urllib.request.Request(self.url) #response = urllib.request.urlopen(request) #print(response.read().decode("utf-8")) r = requests.get(self.url, data=self.data) #r.enconding = 'gbk' #print(r.content.decode("utf-8")) print("climb done start parse...") parser = html_parser.HtmlParser() parser.get_cp_data(r.content.decode("utf-8"))
def __init__(self, root_url, proxy_pool, threads): self.manager = url_manager.UrlManger() self.downloader = html_downloader.HtmlDownloader(proxy_pool) self.parser = html_parser.HtmlParser( urlparse.urlparse(root_url).hostname) self.outputer = html_outputer.HtmlOutputer() # self.proxy_pool = proxy_pool self.dir = dir_scan.DirScan(proxy_pool, self.manager.set_protocol(root_url)) self.cms = cms_scan.CMSScan(proxy_pool) self.sqli = sqli_scan.SqliScan(proxy_pool) self.xss = xss_scan.XSSScan(proxy_pool) self.pool = ThreadPool(threads)
def __init__(self): self.urls = url_manager.UrlManager() self.downloader = html_downloader.HtmlDownloader() self.parser = html_parser.HtmlParser() self.outputer = html_outputer.HtmlOutputer()
if __name__ == "__main__": # 获取游戏动漫中角色 PATH = '../data/urls.pkl' urls = FetchData().getUrls() lock = Lock() url_deq = Queue() print(url_deq.qsize()) for key in urls.new_urls.keys(): for url in urls.new_urls[key]: url_deq.put((key, url)) print(url_deq.qsize()) downloader = html_downloader.HtmlDownloader() parser = html_parser.HtmlParser() pool = [] count_process = cpu_count() length = len(urls.new_urls) print(f'build key urls, length={length}') freeze_support() for i in range(count_process): print(f'build process {i}...') p = MyProcess(str(i), url_deq, lock) pool.append(p) p.start() try: for p in pool: p.join() except:
def __init__(self): self.urls=url_manager.UrlManger() #初始化url管理器 self.downloader=html_downloader.HtmlDownloader()#初始化url下载器 self.parser=html_parser.HtmlParser()#初始化html解析器 self.outputer=html_output.HtmlOutputer()#初始化html输出器
def __init__(self): #初始化, self.urls = url_manager.UrlManager() #创建url管理器实例 self.downloader = html_downloader.HtmlDownloader() #创建下载器实例 self.parser = html_parser.HtmlParser() #创建解析器实例 self.outputer = html_outputer.HtmlOutputer() #创建数据输出实例
def __init__(self): #初始化各个对象 self.url = url_manager.UrlManager() self.parser = html_parser.HtmlParser() self.downloader = html_downloader.HtmlDownloader() self.outputer = img_outputer.ImgOutputer()
def __init__(self): self.urls = url_manager.UrlManager() self.downloader = html_downloader.HtmlDownloader() self.parser = html_parser.HtmlParser() self.outputer = html_outputer.HtmlOutputer() self.connect = connect_mysql.Conenct()
def __init__(self): self.urls = url_manager.UrlManager( ) #类的方法也是属性,函数名是一个指向函数的变量,函数赋值给变量,那么变量就指向函数 self.downloader = html_downloader.HtmlDownloader() self.parser = html_parser.HtmlParser() self.outputer = html_outputer.HtmlOutputer()