def chrome_versions(): if is_ci: return [] print('Getting chrome versions...') import html5lib raw = download_securely( 'https://en.wikipedia.org/wiki/Google_Chrome_version_history').decode('utf-8') root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) table = root.xpath('//*[@id="mw-content-text"]//tbody')[-1] ans = [] for tr in table.iterchildren('tr'): cells = tuple(tr.iterchildren('td')) if not cells: continue if not cells[2].text or not cells[2].text.strip(): continue s = cells[0].get('style') if '#a0e75a' not in s and 'salmon' not in s: break chrome_version = cells[0].text.strip() ts = datetime.strptime(cells[1].text.strip().split()[ 0], '%Y-%m-%d').date().strftime('%Y-%m-%d') try: webkit_version = cells[2].text.strip().split()[1] except IndexError: continue ans.append({'date': ts, 'chrome_version': chrome_version, 'webkit_version': webkit_version}) return list(reversed(ans))
def download_vendor_release(self, tdir, url): self.info('Downloading %s:' % self.TAR_NAME, url) try: raw = download_securely(url) except Exception: if not is_ci: raise self.info('Download failed, sleeping and retrying...') time.sleep(2) raw = download_securely(url) with tarfile.open(fileobj=BytesIO(raw)) as tf: tf.extractall(tdir) if len(os.listdir(tdir)) == 1: return self.j(tdir, os.listdir(tdir)[0]) else: return tdir
def common_user_agents(): if is_ci: return [ # IE 11 - windows 10 'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8.1 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8 'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 7 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 10 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 8.1 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 7 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', ] print('Getting recent UAs...') raw = download_securely( 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8') lines = re.search( r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines() ans = filter_ans(lines) if not ans: raise ValueError('Failed to download list of common UAs') return ans
def common_user_agents(): if is_ci: return [ # IE 11 - windows 10 'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8.1 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8 'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 7 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 10 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 8.1 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 7 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', ] print('Getting recent UAs...') raw = download_securely( 'https://raw.githubusercontent.com/intoli/user-agents/master/src/user-agents.json.gz' ) data = json.loads(gzip.GzipFile(fileobj=io.BytesIO(raw)).read()) uas = [] for item in data: ua = item['userAgent'] if not ua.startswith('Opera'): uas.append(ua) ans = filter_ans(uas)[:256] if not ans: raise ValueError('Failed to download list of common UAs') return ans
def chrome_versions(): if is_ci: return [] print('Getting chrome versions...') import html5lib raw = download_securely( 'https://en.wikipedia.org/wiki/Google_Chrome_version_history').decode( 'utf-8') root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) table = root.xpath('//*[@id="mw-content-text"]//tbody')[-1] ans = [] for tr in table.iterchildren('tr'): cells = tuple(tr.iterchildren('td')) if not cells: continue if not cells[2].text or not cells[2].text.strip(): continue s = cells[0].get('style') if '#a0e75a' not in s and 'salmon' not in s: break chrome_version = cells[0].text.strip() ts = datetime.strptime(cells[1].text.strip().split()[0], '%Y-%m-%d').date().strftime('%Y-%m-%d') try: webkit_version = cells[2].text.strip().split()[1] except IndexError: continue ans.append({ 'date': ts, 'chrome_version': chrome_version, 'webkit_version': webkit_version }) return list(reversed(ans))
def run(self, opts): self.clean() os.makedirs(self.hyphenation_dir) self.info('Downloading hyphenation dictionaries...') with self.temp_dir() as src, ZipFile(BytesIO( download_securely(URL))) as zf, self.temp_dir() as output_dir: zf.extractall(src) if len(os.listdir(src)) == 1: src = os.path.join(src, os.listdir(src)[0]) process_dictionaries(src, output_dir) dics = sorted(x for x in os.listdir(output_dir) if x.endswith('.dic')) m = hashlib.sha1() for dic in dics: with open(os.path.join(output_dir, dic), 'rb') as f: m.update(f.read()) hsh = type('')(m.hexdigest()) buf = BytesIO() with tarfile.TarFile(fileobj=buf, mode='w') as tf: for dic in dics: with open(os.path.join(output_dir, dic), 'rb') as df: tinfo = tf.gettarinfo(arcname=dic, fileobj=df) tinfo.mtime = 0 tinfo.uid = tinfo.gid = 1000 tinfo.uname = tinfo.gname = 'kovid' tf.addfile(tinfo, df) with open( os.path.join(self.hyphenation_dir, 'dictionaries.tar.xz'), 'wb') as f: compress_tar(buf, f) with open(os.path.join(self.hyphenation_dir, 'sha1sum'), 'w') as f: f.write(hsh) shutil.copy(os.path.join(output_dir, 'locales.json'), self.hyphenation_dir)
def common_user_agents(): if is_ci: return [ # IE 11 - windows 10 'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8.1 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8 'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 7 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 10 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 8.1 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 7 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', ] print('Getting recent UAs...') raw = download_securely( 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/' ).decode('utf-8') lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines() ans = filter_ans(lines) if not ans: raise ValueError('Failed to download list of common UAs') return ans
def download_vendor_release(self, tdir, url): self.info('Downloading %s:' % self.TAR_NAME, url) raw = download_securely(url) with tarfile.open(fileobj=BytesIO(raw)) as tf: tf.extractall(tdir) if len(os.listdir(tdir)) == 1: return self.j(tdir, os.listdir(tdir)[0]) else: return tdir
def firefox_versions(): print('Getting firefox versions...') import html5lib raw = download_securely( 'https://www.mozilla.org/en-US/firefox/releases/').decode('utf-8') root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) ol = root.xpath('//main[@id="main-content"]/ol')[0] ol.xpath('descendant::li/strong/a[@href]') ans = filter_ans(ol.xpath('descendant::li/strong/a[@href]/text()')) if not ans: raise ValueError('Failed to download list of firefox versions') return ans
def firefox_versions(): if is_ci: return '51.0 50.0'.split() print('Getting firefox versions...') import html5lib raw = download_securely( 'https://www.mozilla.org/en-US/firefox/releases/').decode('utf-8') root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) ol = root.xpath('//div[@id="main-content"]/ol')[0] ol.xpath('descendant::li/strong/a[@href]') ans = filter_ans(ol.xpath('descendant::li/strong/a[@href]/text()')) if not ans: raise ValueError('Failed to download list of firefox versions') return ans
def run(self, opts): try: with open(self.CA_PATH, 'rb') as f: raw = f.read() except OSError as err: if err.errno != errno.ENOENT: raise raw = b'' nraw = download_securely('https://curl.haxx.se/ca/cacert.pem') if not nraw: raise RuntimeError('Failed to download CA cert bundle') if nraw != raw: self.info('Updating Mozilla CA certificates') with open(self.CA_PATH, 'wb') as f: f.write(nraw) self.verify_ca_certs()
def run(self, opts): try: with open(self.CA_PATH, 'rb') as f: raw = f.read() except EnvironmentError as err: if err.errno != errno.ENOENT: raise raw = b'' nraw = download_securely('https://curl.haxx.se/ca/cacert.pem') if not nraw: raise RuntimeError('Failed to download CA cert bundle') if nraw != raw: self.info('Updating Mozilla CA certificates') with open(self.CA_PATH, 'wb') as f: f.write(nraw) self.verify_ca_certs()
def download_vendor_release(self, tdir, url): self.info('Downloading %s:' % self.TAR_NAME, url) num = 5 if is_ci else 1 for i in range(num): try: raw = download_securely(url) except Exception as err: if i == num - 1: raise self.info( f'Download failed with error "{err}" sleeping and retrying...' ) time.sleep(2) with tarfile.open(fileobj=BytesIO(raw)) as tf: tf.extractall(tdir) if len(os.listdir(tdir)) == 1: return self.j(tdir, os.listdir(tdir)[0]) else: return tdir
def get_list(self): if is_ci: # Dont hammer the server from CI return [ # IE 11 - windows 10 'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8.1 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 8 'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko', # IE 11 - windows 7 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 10 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 8.1 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', # 32bit IE 11 on 64 bit win 7 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', ] raw = download_securely('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8') lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines() return [x.strip() for x in lines if x.strip()]
def download_mathjax_release(self, tdir, url): self.info('Downloading MathJax:', url) raw = download_securely(url) with ZipFile(BytesIO(raw)) as zf: zf.extractall(tdir) return os.path.join(tdir, 'MathJax-' + self.MATH_JAX_VERSION)