def urlopen_get_url_image(self, url, path, view=False): headers = { 'User-Agent': '%s' % USER_AGENTS['AppleWebKit/537.36'], 'GET' : url, 'Referer' : self._com, } return WebContent.urlopen_get_url_file(url, path, ssl=WebContent.url_is_https(url), headers=headers, view=view)
def get_url_of_pages(self, num): url = list( map( lambda x: WebContent.set_url_base_and_num( self._url_base, '%d_%d' % (int(self._url), x)), range(2, num + 1))) url.insert(0, WebContent.set_url_base_and_num(self._url_base, self._url)) return url
def get_title(self, html, pattern=None): title = WebContent.get_url_title(html, pattern) if title: if type(title) != str: title = title.decode() if self._redundant_title: for rt in self._redundant_title: title = title.replace(rt, '') return title
def urlopen_get_url_image(self, url, path, view=False): headers = { 'User-Agent': '%s' % USER_AGENTS['AppleWebKit/537.36'], 'GET': url, 'Referer': 'https://m.mzitu.com/', } return WebContent.urlopen_get_url_file(url, path, ssl=True, headers=headers, view=view)
def get_user_input(self, args=None): if not args: args = Base.get_user_input('hu:n:p:x:m:i:R:t:vdD') if '-h' in args: Base.print_help(self.help_menu) if '-u' in args: self._url = re.sub('/$', '', args['-u']) if '-n' in args: self._num = int(args['-n']) if '-p' in args: self._path = os.path.abspath(args['-p']) if '-R' in args: self._ex_re_image_url = os.path.abspath(args['-R']) if '-t' in args: try: n = int(args['-t']) except ValueError as e: Base.print_exit('%s, -h for help!' % str(e)) if n: self._thread_max = n if '-v' in args: self._view = True self._pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_WARN) if '-x' in args: self._xval = args['-x'] if '-m' in args: dl_image_funcs = { 'wget': self.wget_url_image, 'rtrv' : self.retrieve_url_image, 'rget' : self.requests_get_url_image, 'uget' : self.urlopen_get_url_image, } if args['-m'] in dl_image_funcs.keys(): self._dl_image = dl_image_funcs[args['-m']] if '-d' in args: self.__dbg = 1 self._pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_ALL ) if '-D' in args: self.__dbg = 2 self._pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_DBG) WebContent.pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_DBG) # check url if self._url: base, num = WebContent.get_url_base_and_num(self._url) if base: self._url_base = base if num: self._url = num self._pr.pr_dbg('get base: %s, url: %s' % (base, self._url)) else: Base.print_exit('[WebImage] Error, no set url, -h for help!') if self._url_base: www_com = re.match('http[s]?://.+\.(com|cn|net)', self._url_base) if www_com: self._com = www_com.group() return args
def convert_url_to_title(self, url): return WebContent.convert_url_to_title(url)
def get_pages(self, html, pattern=None): return WebContent.get_url_pages(html, pattern)
def get_url_content(self, url, view=False): return WebContent.get_url_content(url=url, view=view)
def requests_get_url_image(self, url, path, view=False): return WebContent.requests_get_url_file(url, path, view=view)
def wget_url_image(self, url, path, view=False): return WebContent.wget_url_file(url, path, view=view, config="-c -t 3 -T 10 -U \'%s\'" % USER_AGENTS['AppleWebKit/537.36'])
def retrieve_url_image(self, url, path, view=False): return WebContent.retrieve_url_file(url, path, view=view)
def get_url_address(self, url_base, url): return WebContent.set_url_base_and_num(url_base, url)
def get_url_content(self, url, view=False): url_content = WebContent.get_url_content(url=url, view=view) if url_content: return re.sub(b'\\\\', b'', url_content)