def get_url_content(cls, url, retry_times=3, view=True, path=None): if cls.url_is_https(url): content = cls.get_html(url=url, context=cls.CONTEXT_UNVERIFIED, retry_times=retry_times, view=view) else: content = cls.get_html(url=url, retry_times=retry_times, view=view) # save content to path. if all((content, path)): Path.make_path(path) f = '%s/%s' % (path, cls.convert_url_to_title(url)) if File.get_exname(f) != '.html': f = f + '.html' with open(f, 'w') as fd: fd.write(content) return content
def unzip_wiz(self): for f in self._fs: path = os.path.join( os.path.dirname(f).replace(self._src, self._dst), File.get_fname(f)) path = os.path.splitext(path)[0] Path.make_path(path) self.unzip_file(f, path) # remove small image. Image.remove_small_image(path) # move image. if os.path.exists('%s/index_files' % path): for ff in os.listdir('%s/index_files' % path): if Image.image_file('%s/index_files/%s' % (path, ff)): shutil.copyfile('%s/index_files/%s' % (path, ff), '%s/%s' % (path, ff)) # remove invalid files and dirs. shutil.rmtree('%s/index_files' % path) if os.path.exists('%s/index.html' % path): os.remove('%s/index.html' % path)
def download_image(self, url, path): if self._dl_image: Path.make_path(path) self._dl_image(url, path, self.__dbg)