def run(self):
        _LOG.info("Fetch links from url: %s", self.url)
        header = {
            # most common user-agent
            'user-agent': 'Mozilla/5.0 ;Windows NT 6.1; WOW64; Trident/7.0; rv:11.0; like Gecko',
        }
        page = requests.get(self.url, headers=header)
        tree = html.fromstring(page.text)
        tree.make_links_absolute(self.url, resolve_base_href=True)

        elements_links = []
        for element, attribute, link, pos in tree.iterlinks():
            if self.regex.search(link):
                elements_links.append((element, link))
            else:
                _LOG.debug("Skip [%s]", link)

        num_links = len(elements_links)
        for i, (element, link) in enumerate(elements_links):
            if self.regex.search(link):
                _LOG.info(
                    "(%s/%s) %s (%s)",
                    i + 1,
                    num_links,
                    element.text,
                    link,
                )
                new_filepath = os.path.join(
                    self.out_dir,
                    webutil.unquote_url(os.path.basename(link)),
                )

                if os.path.exists(new_filepath):
                    _LOG.info(
                        "Skip [%s] since [%s] already exists",
                        link,
                        new_filepath,
                    )
                    continue
                _LOG.info("Download to [%s]", new_filepath)
                if not self.dry_run:
                    tmp_filepath = new_filepath + ".tmp"
                    webutil.download_file(link, tmp_filepath)
                    os.rename(tmp_filepath, new_filepath)
 def test_unquote_url_OneEncodedCharacter_ReturnUnquoted(self):
     self.assertEqual(
         'http://abc.com/hello world',
         webutil.unquote_url('http://abc.com/hello%20world'),
     )
 def test_unquote_url_NoSpecialCharacter_ReturnAsIs(self):
     self.assertEqual(
         'http://abc.com',
         webutil.unquote_url('http://abc.com'),
     )
 def test_unquote_url_EmptyString(self):
     self.assertEqual('', webutil.unquote_url(''))