Example #1
0
 def urlopen_get_url_image(self, url, path, view=False):
     headers = {
         'User-Agent': '%s' % USER_AGENTS['AppleWebKit/537.36'],
         'GET' : url,
         'Referer' : self._com,
     }
     return WebContent.urlopen_get_url_file(url, path,
                                            ssl=WebContent.url_is_https(url),
                                            headers=headers, view=view)
Example #2
0
 def get_url_of_pages(self, num):
     url = list(
         map(
             lambda x: WebContent.set_url_base_and_num(
                 self._url_base, '%d_%d' % (int(self._url), x)),
             range(2, num + 1)))
     url.insert(0, WebContent.set_url_base_and_num(self._url_base,
                                                   self._url))
     return url
Example #3
0
 def get_title(self, html, pattern=None):
     title = WebContent.get_url_title(html, pattern)
     if title:
         if type(title) != str:
             title = title.decode()
         if self._redundant_title:
             for rt in self._redundant_title:
                 title = title.replace(rt, '')
     return title
Example #4
0
 def urlopen_get_url_image(self, url, path, view=False):
     headers = {
         'User-Agent': '%s' % USER_AGENTS['AppleWebKit/537.36'],
         'GET': url,
         'Referer': 'https://m.mzitu.com/',
     }
     return WebContent.urlopen_get_url_file(url, path,
                                            ssl=True,
                                            headers=headers, view=view)
Example #5
0
 def get_user_input(self, args=None):
     if not args:
         args = Base.get_user_input('hu:n:p:x:m:i:R:t:vdD')
     if '-h' in args:
         Base.print_help(self.help_menu)
     if '-u' in args:
         self._url = re.sub('/$', '', args['-u'])
     if '-n' in args:
         self._num = int(args['-n'])
     if '-p' in args:
         self._path = os.path.abspath(args['-p'])
     if '-R' in args:
         self._ex_re_image_url = os.path.abspath(args['-R'])
     if '-t' in args:
         try:
             n = int(args['-t'])
         except ValueError as e:
             Base.print_exit('%s, -h for help!' % str(e))
         if n:
             self._thread_max = n
     if '-v' in args:
         self._view = True
         self._pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_WARN)
     if '-x' in args:
         self._xval = args['-x']
     if '-m' in args:
         dl_image_funcs = {
             'wget': self.wget_url_image,
             'rtrv' : self.retrieve_url_image,
             'rget' : self.requests_get_url_image,
             'uget' : self.urlopen_get_url_image,
         }
         if args['-m'] in dl_image_funcs.keys():
             self._dl_image = dl_image_funcs[args['-m']]
     if '-d' in args:
         self.__dbg = 1
         self._pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_ALL )
     if '-D' in args:
         self.__dbg = 2
         self._pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_DBG)
         WebContent.pr.set_pr_level(self._pr.get_pr_level() | Print.PR_LVL_DBG)
     # check url
     if self._url:
         base, num = WebContent.get_url_base_and_num(self._url)
         if base:
             self._url_base = base
         if num:
             self._url = num
         self._pr.pr_dbg('get base: %s, url: %s' % (base, self._url))
     else:
         Base.print_exit('[WebImage] Error, no set url, -h for help!')
     if self._url_base:
         www_com = re.match('http[s]?://.+\.(com|cn|net)', self._url_base)
         if www_com:
             self._com = www_com.group()
     return args
Example #6
0
 def convert_url_to_title(self, url):
     return WebContent.convert_url_to_title(url)
Example #7
0
 def get_pages(self, html, pattern=None):
     return WebContent.get_url_pages(html, pattern)
Example #8
0
 def get_url_content(self, url, view=False):
     return WebContent.get_url_content(url=url, view=view)
Example #9
0
 def requests_get_url_image(self, url, path, view=False):
     return WebContent.requests_get_url_file(url, path, view=view)
Example #10
0
 def wget_url_image(self, url, path, view=False):
     return WebContent.wget_url_file(url,
                                     path,
                                     view=view,
                                     config="-c -t 3 -T 10 -U \'%s\'" %
                                     USER_AGENTS['AppleWebKit/537.36'])
Example #11
0
 def retrieve_url_image(self, url, path, view=False):
     return WebContent.retrieve_url_file(url, path, view=view)
Example #12
0
 def get_url_address(self, url_base, url):
     return WebContent.set_url_base_and_num(url_base, url)
Example #13
0
 def get_url_content(self, url, view=False):
     url_content = WebContent.get_url_content(url=url, view=view)
     if url_content:
         return re.sub(b'\\\\', b'', url_content)