Ejemplo n.º 1
0
 def test_regular_url(self):
     """
         Test the UTF8 to ASCII converter on a normal url (http://www.google.com)
     """
     url = u'http://www.google.com'
     ascii_url = 'http://www.google.com'
     self.assertEqual(convert_utf8_url_to_ascii(url), ascii_url)
Ejemplo n.º 2
0
 def test_unicode_with_params_url(self):
     """
         Test the UTF8 to ASCII converter on a url with unicode characters and parameters (http://Åsa:abc123@➡.ws:81/admin)
     """
     url = u'http://Åsa:abc123@➡.ws:81/admin'
     ascii_url = 'http://%C3%85sa:[email protected]:81/admin'
     self.assertEqual(convert_utf8_url_to_ascii(url), ascii_url)
Ejemplo n.º 3
0
 def test_unicode_url(self):
     """
         Test the UTF8 to ASCII converter on a url with unicode characters (http://➡.ws/admin)
     """
     url = u'http://➡.ws/admin'
     ascii_url = 'http://xn--hgi.ws/admin'
     self.assertEqual(convert_utf8_url_to_ascii(url), ascii_url)
Ejemplo n.º 4
0
 def test_quoted_with_params_url(self):
     """
         Test the UTF8 to ASCII converter on a quoted url with parameters (http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/%2F)
     """
     url = 'http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/%2F'
     ascii_url = 'http://xn--hgi.ws/%E2%99%A5/%2F'
     self.assertEqual(convert_utf8_url_to_ascii(url), ascii_url)
Ejemplo n.º 5
0
 def test_quoted_url(self):
     """
         Test the UTF8 to ASCII converter on a quoted url (http://\xe2\x9e\xa1.ws/\xe2\x99\xa5)
     """
     url = 'http://\xe2\x9e\xa1.ws/\xe2\x99\xa5'
     ascii_url = 'http://xn--hgi.ws/%E2%99%A5'
     self.assertEqual(convert_utf8_url_to_ascii(url), ascii_url)
Ejemplo n.º 6
0
    def save_errors_raw_data_to_db(self, errors, batch_outdir):
        """
        """
        raw_data = list()
        for i, error in enumerate(errors):
            url, _, _ = error
            try:
                url = convert_utf8_url_to_ascii(url)
                html_content = fetch_html_content(url)
                raw_data.append((url, html_content))
            except:
                self.log_error(u"Could not fetch raw html data for error'd url: {0} (Reason: {1})".format([url], traceback.format_exc()))
                continue

        raw_data_dir = os.path.join(batch_outdir, ERRORS_RAW_DATA_DIR)
        self.log_info(u"Writing raw html data to {0}".format(raw_data_dir))
        self.save_raw_data_to_path(raw_data, raw_data_dir)