def decompress(body_bytes, content_encoding, url=None): content_encoding = content_encoding.lower() if content_encoding == 'deflate': try: return zlib.decompress(body_bytes, zlib.MAX_WBITS) # expects header/checksum except Exception: try: # http://www.gzip.org/zlib/zlib_faq.html#faq38 stats.stats_sum('content-encoding deflate fallback try', 1) return zlib.decompress(body_bytes, -zlib.MAX_WBITS) # no header/checksum except Exception as e: LOGGER.debug('deflate fail for url %s: %s', url, str(e)) stats.stats_sum('content-encoding deflate fail', 1) return body_bytes elif content_encoding == 'gzip' or content_encoding == 'x-gzip': try: return zlib.decompress(body_bytes, 16 + zlib.MAX_WBITS) except Exception as e: LOGGER.debug('gzip fail for url %s: %s', url, str(e)) stats.stats_sum('content-encoding gzip fail', 1) return body_bytes elif content_encoding == 'br': try: return brotli.decompress(body_bytes) except Exception as e: LOGGER.debug('bz fail for url %s: %s', url, str(e)) stats.stats_sum('content-encoding brotli fail', 1) return body_bytes else: # 'identity' is in the standard # also fairly common to have 'raw', 'none', or a charset return body_bytes
def test_streaming_compression_flush(one_compressed_file, chunk_size, mode, quality, lgwin, lgblock): """ Confirm that the streaming compressor works as expected, including flushes after each chunk. """ compressed_chunks = [] c = brotli.Compressor( mode=mode, quality=quality, lgwin=lgwin, lgblock=lgblock ) with open(one_compressed_file, 'rb') as f: while True: next_data = f.read(chunk_size) if not next_data: break compressed_chunks.append(c.compress(next_data)) compressed_chunks.append(c.flush()) compressed_chunks.append(c.finish()) decompressed = brotli.decompress(b''.join(compressed_chunks)) with open(one_compressed_file, 'rb') as f: assert decompressed == f.read()
def request(self, method, url, *args, **kwargs): ourSuper = super(CloudScraper, self) resp = ourSuper.request(method, url, *args, **kwargs) if resp.headers.get('Content-Encoding') == 'br': if self.allow_brotli and resp._content: resp._content = brotli.decompress(resp.content) else: logging.warning('Brotli content detected, But option is disabled, we will not continue.') return resp # Debug request if self.debug: self.debugRequest(resp) # Check if Cloudflare anti-bot is on if self.isChallengeRequest(resp): if resp.request.method != 'GET': # Work around if the initial request is not a GET, # Supersede with a GET then re-request the original METHOD. self.request('GET', resp.url) resp = ourSuper.request(method, url, *args, **kwargs) else: # Solve Challenge resp = self.sendChallengeResponse(resp, **kwargs) return resp
def _decompress(input_file_name, use_gzip=True): if use_gzip: with gzip.open(input_file_name, 'rb') as in_f: return in_f.read().decode() else: with open(input_file_name, 'rb') as in_f: return brotli.decompress(in_f.read()).decode()
def download_uncompress(url, dest): print('Downloading {}'.format(url)) with urllib.request.urlopen(url) as response: data = brotli.decompress(response.read()) print('Extracted to {} bytes'.format(len(data))) os.makedirs(os.path.dirname(dest), exist_ok=True) with open(dest, 'wb+') as file: file.write(data) print('Decompressed to {}'.format(dest))
def _get_total_datapoints(self): r = self.sdpf() base_dir = self.get_path_in_dir('') datatype_map = { 'No. of Cases (Lab Confirmed)': DataTypes.CONFIRMED, 'No. of Cases (on admission)': DataTypes.PROBABLE, 'No. Discharged': None, 'No. of Deaths': DataTypes.STATUS_DEATHS } for date in self.iter_nonempty_dirs(base_dir): path = base_dir / date / 'index.html' try: with open(path, 'r', encoding='utf-8') as f: html = f.read() except UnicodeDecodeError: import brotli with open(path, 'rb') as f: html = brotli.decompress(f.read()).decode('utf-8') table = pq(html)('table:contains("States Affected")') datatypes = [ datatype_map[pq(i).text().strip()] for i in pq(table)('thead tr th')[1:] ] region_trs = pq(table)('tbody tr') for region_tr in region_trs: region = pq(region_tr[0]).text().strip() vals = {} for datatype, value in zip(datatypes, region_tr[1:]): if not datatype: continue value = int(pq(value).text().replace(',', '')) vals[datatype] = value r.append(region_schema=Schemas.ADMIN_1, region_parent='NG', region_child=region, datatype=datatype, value=value, date_updated=date, source_url=self.SOURCE_URL) r.append(region_schema=Schemas.ADMIN_1, region_parent='NG', region_child=region, datatype=DataTypes.TOTAL, value=vals[DataTypes.CONFIRMED] + vals[DataTypes.PROBABLE], date_updated=date, source_url=self.SOURCE_URL) return r
def proc_search(domain, url, headers, key): print("searching", url + key) href_list = [] tit_list = [] special_getter = False for k in prefix_getter: if url.startswith(k): href_list, tit_list = prefix_getter[k](key) special_getter = True break if not special_getter: res = requests.Response() try: s = requests.Session() s.mount('https://', HTTPAdapter(max_retries=3)) res = s.get(url + key, headers=headers) s.close() except: log('Failed fetching:\n\t' + url + key) return content = res.content if res.headers.get('Content-Encoding') == 'br': content = brotli.decompress(content) content = content.decode("utf-8") html = content soup = BeautifulSoup(html, "lxml") for a in soup.find_all('a', href=True): if a.has_attr('title'): tit_list.append(a['title']) else: tit_list.append(a.text) href_list.append(a['href']) if tit_list is None and href is None: log('Failed fetching:\n\t' + url + key) return for a in tit_list: seg_list = jieba.cut_for_search(a) for w in seg_list: if w not in searched: new_key.add(w) for href in href_list: _url = parse_url(href, domain) if _url is not None: get_webpage(domain, _url, headers) print(url, "Fin")
def test_roundtrip_compression_with_files(simple_compressed_file): """ Roundtripping data through the compressor works correctly. """ with open(simple_compressed_file[0], 'rb') as f: uncompressed_data = f.read() assert brotli.decompress( brotli.compress(uncompressed_data)) == uncompressed_data
def test_normal_page(self): @compress_page def a_view(request): return self.resp r = a_view(self.req) self.assertEqual(r.get('Content-Encoding'), 'br') self.assertEqual(r.get('Content-Length'), str(len(r.content))) self.assertTrue(brotli.decompress(r.content), self.compressible_string)
def _decode(self, response): content = response.content content_encoding = response.headers.get('content-encoding') if content_encoding == 'br': content = brotli.decompress(content) charset_encoding = chardet.detect(content).get('encoding') if charset_encoding == 'ascii': charset_encoding = 'unicode_escape' return content.decode(charset_encoding, 'ignore')
def read_file(name, useBrotli=False): with open(name, "rb") as f: s = f.read() sl = len(s) if useBrotli: s = brotli.decompress(s) ratio = (sl / len(s)) * 100.0 logging.debug(f"w {name}: brotli {sl} -> {len(s)}, {ratio:.1f}%") return s
async def worker(url_iterator: Iterator, token_receiver: trio.abc.ReceiveChannel): async with token_receiver: for url in url_iterator: await token_receiver.receive() print( f"[{round(trio.current_time(), 2)}] Start loading link: {url}" ) try: async with httpx.AsyncClient() as client: optanon_timestamp = quote_plus( datetime.datetime.now().strftime( "%a+%b+%d+%Y+%H:%M:%S")) past = (datetime.datetime.now() + datetime.timedelta(hours=7) - datetime.timedelta( minutes=random.randrange(0, 8), seconds=random.randrange(0, 60), )) optanon_past_timestamp = past.strftime( f"%Y-%m-%dT%H:%M:%S.{random.randrange(0, 999)}Z") default_cookies = OrderedDict({ "ccpa-state": "No", "OptanonConsent": f"isIABGlobal=true&datestamp={optanon_timestamp}+GMT-0700+(Pacific+Daylight+Time)&version=5.9.0&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0004%3A1&hosts=&geolocation=US%3BCA&AwaitingReconsent=false", "OptanonGlobal": f"isIABGlobal=false&datestamp={optanon_timestamp}+GMT-0700+(Pacific+Daylight+Time)&version=5.15.0&landingPath=NotLandingPage&groups=C0003%3A1%2CC0004%3A1%2CC0005%3A1%2CBG50%3A1%2CC0002%3A1%2CC0001%3A1&hosts=xvr%3A1%2CH35%3A1%2Cxik%3A1%2Cudm%3A1%2Cots%3A1%2CH99%3A1%2Cyla%3A1%2Cixz%3A1%2Cziw%3A1%2CH253%3A1%2Cmwk%3A1%2Czci%3A1%2Cjjk%3A1%2Ceuw%3A1%2Cdwu%3A1%2Ceyl%3A1%2CH28%3A1%2Cbup%3A1%2Cdce%3A1%2CH30%3A1%2Coom%3A1%2Copx%3A1%2CH151%3A1%2Cpjw%3A1%2Cgzg%3A1%2Cywk%3A1%2Cdnm%3A1%2Cwjk%3A1%2Cuuk%3A1%2Cudt%3A1%2Czgf%3A1%2Cayv%3A1%2Crai%3A1%2Cktz%3A1%2Cdfh%3A1%2Clck%3A1%2CH117%3A1%2Chty%3A1%2Cszd%3A1%2Cbax%3A1%2Cymj%3A1%2Cjjg%3A1%2Chbz%3A1%2Cdui%3A1%2Cstj%3A1%2Cyqw%3A1%2Cddu%3A1%2Ccnt%3A1%2CH59%3A1%2Cyze%3A1%2CH80%3A1%2Ctif%3A1%2Cdvt%3A1%2Csjs%3A1%2Cviv%3A1%2Catx%3A1%2CH212%3A1%2Caiy%3A1%2Cqsc%3A1%2Cbro%3A1%2Capv%3A1%2Cvhh%3A1%2Cslt%3A1%2Cmlc%3A1%2Czsx%3A1%2CH155%3A1%2Cqih%3A1%2CH122%3A1%2CH32%3A1%2Cwjk%3A1%2Caso%3A1%2Cvpf%3A1%2Cbhq%3A1%2Cvrh%3A1%2CH37%3A1%2Cuuk%3A1%2Cwtu%3A1%2Chiz%3A1%2CH65%3A1%2CH68%3A1%2Czsx%3A1&legInt=&AwaitingReconsent=false", }) try: response = await client.get( url, headers=default_headers, cookies=default_cookies, timeout=5) if ("content-encoding" in response.headers and response.headers["content-encoding"] == "br"): try: response.decoded = brotli.decompress( response.content).decode( response.encoding) except Exception as e: response.decoded = response.content.decode( response.encoding) else: response.decoded = response.content.decode( response.encoding) except Exception as e: response = f"{e.__class__.__name__} :: {e}" responses[url] = response except Exception as e: response = f"[ fetch_urls ] No response from url {url}: {e.__class__.__name__} :: {e}" responses[url] = response
def _check_decompression(self, test_data): # Write decompression to temp file and verify it matches the original. temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data) temp_compressed = _test_utils.get_temp_compressed_name(test_data) original = test_data with open(temp_uncompressed, 'wb') as out_file: with open(temp_compressed, 'rb') as in_file: out_file.write(brotli.decompress(in_file.read())) self.assertFilesMatch(temp_uncompressed, original)
def decompress(self, encoding, data): encoding = ensure_text(encoding) if encoding == 'gzip': return gzip.decompress(data) elif encoding == 'br': return brotli.decompress(data) elif encoding == 'deflate': return zlib.decompress(data) return data
async def songdb(timeout: int = config.TIMEOUT) -> Any: """ 返回 ``song_id`` - 歌名(en|jp) 的字典表 """ async with websockets.connect(config.ESTERTION_URI, timeout=timeout) as ws: await ws.send('constants') r = await ws.recv() await ws.close() return json.loads(brotli.decompress(r))
def _check_decompression_matches(self, test_data): # Write decompression to temp file and verify it matches the original. with open(get_temp_uncompressed_name(test_data), 'wb') as out_file: with open(get_temp_compressed_name(test_data), 'rb') as in_file: out_file.write(brotli.decompress(in_file.read())) self.assertTrue( filecmp.cmp(get_temp_uncompressed_name(test_data), test_data, shallow=False))
def decompress(data: bytes, encoding: str) -> bytes: if encoding == 'gzip': data = gzip.decompress(data) elif encoding == 'br': data = brotli.decompress(data) elif encoding == 'deflate': data = zlib.decompress(data) return data
def post_dissect(self, s): if not conf.contribs["http"]["auto_compression"]: return s encodings = self._get_encodings() # Un-chunkify if "chunked" in encodings: data = b"" while s: length, _, body = s.partition(b"\r\n") try: length = int(length, 16) except ValueError: # Not a valid chunk. Ignore break else: load = body[:length] if body[length:length + 2] != b"\r\n": # Invalid chunk. Ignore break s = body[length + 2:] data += load if not s: s = data # Decompress try: if "deflate" in encodings: import zlib s = zlib.decompress(s) elif "gzip" in encodings: s = gzip_decompress(s) elif "compress" in encodings: import lzw s = lzw.decompress(s) elif "br" in encodings: if _is_brotli_available: s = brotli.decompress(s) else: log_loading.info( "Can't import brotli. brotli decompression " "will be ignored !") elif "zstd" in encodings: if _is_zstd_available: # Using its streaming API since its simple API could handle # only cases where there is content size data embedded in # the frame bio = io.BytesIO(s) reader = zstandard.ZstdDecompressor().stream_reader(bio) s = reader.read() else: log_loading.info( "Can't import zstandard. zstd decompression " "will be ignored !") except Exception: # Cannot decompress - probably incomplete data pass return s
def response_hook(resp, *args, **kwargs): # parse the json storing the result on the response object try: dec_res = brotli.decompress(resp.content) content = json.loads(dec_res) resp.dec_result = content['results'] global curr_req_max curr_req_max = content['total'] except: pass
def test_roundtrip_compression_with_files(simple_compressed_file): """ Roundtripping data through the compressor works correctly. """ with open(simple_compressed_file[0], 'rb') as f: uncompressed_data = f.read() assert brotli.decompress( brotli.compress(uncompressed_data) ) == uncompressed_data
def hit(self, url): request_obj = Request(url) request_obj = self.add_headers(request_obj) response_obj = request.urlopen(request_obj) self.cookies = response_obj.getheader('Set-Cookie') self.content = decompress(response_obj.read()).decode('utf-8') return
def get(path): url = base + path if cache.get(path) is not None: html = brotli.decompress(cache[url]) else: html = requests.get(url).text redis_queue.put((path, brotli.compress(html.encode(), brotli.MODE_TEXT))) return html
def decode_brotli(self, data): ''' 还原 HTTP 响应中采用 brotli 压缩的数据 标识: Content-Encoding: br ''' try: return brotli.decompress(data) except: return data
def decompress_brotli(data: bytes) -> bytes: """decompresses brotli-compressed data :param data: compressed data :type data: bytes :raises brotli.error: BrotliDecompress failed :return: uncompressed data :rtype: bytes """ return brotli.decompress(data)
def decode_content(content, encoding_header): encodings = encoding_header.replace(' ', '').split(',') for encoding in reversed(encodings): if encoding == 'identity': continue if encoding == 'br': content = brotli.decompress(content) elif encoding == 'gzip': content = gzip.decompress(content) return content
def test_compress(self): file = BytesIO(content) compressor = BrotliCompressor() out = compressor.compress("", file) self.assertGreater(out.size, 0) self.assertLessEqual(out.size, len(content)) result = brotli.decompress(out.read()) self.assertEqual(result, content)
def brdecompress(payload, log): try: import brotli data = brotli.decompress(payload) return data except Exception: Logger.log( "Either brotli decompress failed or discord returned incorrect content encodings.", None, log) #yea, it happens :/ return payload
def brotli_compress(self, filepath): """ Compress a file with Brotli. Save it to the same directory as the input filepath. Returns the input filepath + '.br' """ ext = os.path.splitext(filepath)[1] # Check 1: Make sure the extension is okay if ext not in self.included_filetypes: return None new_filepath = filepath + '.br' in_file_content = None with self.open(filepath, 'rb') as in_file: in_file_content = in_file.read() # Check 2: Check that the file isn't already brotli-compressed. # Brotli has no magic number like gzip, but when we try to # decompress a non-compressed file Brotli throws an error. try: brotli.decompress(in_file_content) except brotli.error: # BrotliDecompress failed # file is not compressed in_file.seek(0) else: # file is compressed: don't re-compress it return None # Check 3: If the file is smaller than MINIMUM_SIZE_FOR_COMPRESSION # compression can be ineffective resulting in a larger file # instead of a smaller one. if len(in_file_content) < self.MINIMUM_SIZE_FOR_COMPRESSION: return None compressed_contents = brotli.compress(in_file_content, quality=11) if self.exists(new_filepath): self.delete(new_filepath) self.save(new_filepath, ContentFile(compressed_contents)) return new_filepath
def test_streaming_page(self): @compress_page def a_streaming_view(request): return self.stream_resp_unicode r = a_streaming_view(self.req) self.assertEqual(r.get('Content-Encoding'), 'br') self.assertFalse(r.has_header('Content-Length')) self.assertEqual( brotli.decompress(b''.join(r)), b''.join(x.encode('utf-8') for x in self.sequence_unicode))
def test_compress_streaming_response(self): """ Compression is performed on responses with streaming content. """ r = CompressionMiddleware().process_response(self.req, self.stream_resp) self.assertEqual(brotli.decompress(b''.join(r)), b''.join(self.sequence)) self.assertEqual(r.get('Content-Encoding'), 'br') self.assertFalse(r.has_header('Content-Length')) self.assertEqual(r.get('Vary'), 'Accept-Encoding')
def descargar_dataset_url(self, apiurl, verbose=True, dataframe=True): """Descargar un dataset usando directamente un API URL provisto. Este método es útil para descargar datasets de APIs que aun no esten implementados dentro del atributo de 'api_urls'. Tambien funciona para bajar la data cruda al escoger False para el parametro de 'dataframe'. Parametros: apiurl (str): URL completo (e.g. 'https://[...]') de un API público de BioPortal. verbose (bool; opcional): Opcion para imprimir detalles a lo largo de la descarga. dataframe (bool; opcional): Opcion para procesar datos descargados a DataFrame. Elija False para devolver data cruda. Devuelve: apidata (str o pandas.DataFrame): Datos descargados. Crudos (str) si 'dataframe' es False; si no, procesados a un pandas.DataFrame. exitoso (bool): Bandera clarificando si el api pudo descargarse exitosamente. """ self.apidata = None try: r = requests.get(apiurl, headers={'Accept-Encoding': 'br'}, timeout=(15, None)) except requests.exceptions.Timeout: # Se quedo pegao print("Servidor no responde.") return (None, False) # Status code if r.status_code != 200: print("Respuesta inesperada del servidor (#{})".format( r.status_code)) return (None, False) apidata_raw = r.content if r.encoding.startswith('ISO'): if r.content.decode().startswith('<!DOCTYPE html>'): print( "Servidor no esperaba que bajaras este API (devolvio un documento HTML)" ) return (None, False) try: apidata_raw = (brotli.decompress(apidata_raw)) except: pass if dataframe: apidata = pd.json_normalize(json.loads(apidata_raw), sep='_') else: apidata = apidata_raw return (apidata, True)
def test_decompression(simple_compressed_file): """ Decompressing files returns their original form using decompress. """ with open(simple_compressed_file[0], 'rb') as f: uncompressed_data = f.read() with open(simple_compressed_file[1], 'rb') as f: compressed_data = f.read() assert brotli.decompress(compressed_data) == uncompressed_data
def decompress(self, widget): dest_filename = self.filename.split('.')[:-1] dest_filename = '.'.join(dest_filename) self.out = self.out + dest_filename file = self.file with open(file, 'rb') as infile: filedata = infile.read() data = brotli.decompress(filedata) outfile = open(self.out, 'wb') outfile.write(data) outfile.close()
def download_index_report(dest_folder: str = './indices/reports/', date=datetime.today()): if not os.path.exists(dest_folder): os.makedirs(dest_folder) # create folder if it does not exist #url = 'https://www1.nseindia.com/content/indices/ind_close_all_{}.csv'.format(date.strftime("%d%m%Y")) url = 'https://www1.nseindia.com/homepage/Indices1.json' filename = 'ind_close_all_{}.csv'.format(date.strftime("%d%m%Y")) file_path = os.path.join(dest_folder, filename) print('URL: ' + url) header = { 'method': 'GET', 'scheme': 'https', #'authority': 'nseindia.com', 'accept': 'application/json, text/plain, */*', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9', 'dnt': '1', 'host': 'www1.nseindia.com', 'referer': 'https://www1.nseindia.com/products/content/equities/indices/homepage_indices.htm', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36', } try: #print(header) req = urllib.request.Request(url, headers=header) response = urllib.request.urlopen(req) coder = response.headers.get('Content-Encoding', 'utf-8') #print('Coder: {}'.format(coder)) if coder == 'br': html_page = brotli.decompress(response.read()).decode('utf-8') elif coder == 'gzip': buf = BytesIO(response.read()) html_page = gzip.GzipFile(fileobj=buf).read().decode('utf-8') print('Got gzip response') else: html_page = response.read().decode('utf-8') import shutil with open(file_path, 'wb') as f: shutil.copyfileobj(html_page, f) print('Downloaded') except: traceback.print_exc() pass
def load_courses(self): dir = os.path.join(self.workdir, 'courses') files = glob.glob(dir + '/*') courses = [] for file in files: with open(file, 'rb') as fh: raw_data = fh.read() data = brotli.decompress(raw_data) js = json.loads(data) courses.extend(js['data']['courses']) self.courses = courses
def _decode(self, body, encoding): if encoding == b'gzip' or encoding == b'x-gzip': body = gunzip(body) if encoding == b'deflate': try: body = zlib.decompress(body) except zlib.error: body = zlib.decompress(body, -15) if encoding == b'br' and b'br' in ACCEPTED_ENCODINGS: body = brotli.decompress(body) return body
def content(self) -> Union[str, bytes]: if not self._content: return "" decompressed = brotli.decompress(self._content) try: decompressed = decompressed.decode("utf8") except UnicodeDecodeError: pass return decompressed
def _check_decompression(self, test_data, **kwargs): # Only dictionary is supported as a kwarg to brotli.decompress. if 'dictionary' in kwargs: kwargs = {'dictionary': kwargs['dictionary']} else: kwargs = {} # Write decompression to temp file and verify it matches the original. temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data) temp_compressed = _test_utils.get_temp_compressed_name(test_data) original = test_data with open(temp_uncompressed, 'wb') as out_file: with open(temp_compressed, 'rb') as in_file: out_file.write(brotli.decompress(in_file.read(), **kwargs)) self.assertFilesMatch(temp_uncompressed, original)
def test_brotli_accepted_no_change(self): import brotli content = brotli.compress('ABCDEFG'.encode('utf-8')) headers = {'Content-Type': 'application/octet-stream', 'Content-Encoding': 'br', 'Content-Length': str(len(content)) } headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_', environ={'HTTP_ACCEPT_ENCODING': 'gzip, deflate, br'}) assert headers['Content-Encoding'] == 'br' assert headers['Content-Length'] == str(len(content)) assert brotli.decompress(b''.join(gen)).decode('utf-8') == 'ABCDEFG'
def __init__(self, file, checkChecksums=1, fontNumber=-1): if not haveBrotli: log.error( 'The WOFF2 decoder requires the Brotli Python extension, available at: ' 'https://github.com/google/brotli') raise ImportError("No module named brotli") self.file = file signature = Tag(self.file.read(4)) if signature != b"wOF2": raise TTLibError("Not a WOFF2 font (bad signature)") self.file.seek(0) self.DirectoryEntry = WOFF2DirectoryEntry data = self.file.read(woff2DirectorySize) if len(data) != woff2DirectorySize: raise TTLibError('Not a WOFF2 font (not enough data)') sstruct.unpack(woff2DirectoryFormat, data, self) self.tables = OrderedDict() offset = 0 for i in range(self.numTables): entry = self.DirectoryEntry() entry.fromFile(self.file) tag = Tag(entry.tag) self.tables[tag] = entry entry.offset = offset offset += entry.length totalUncompressedSize = offset compressedData = self.file.read(self.totalCompressedSize) decompressedData = brotli.decompress(compressedData) if len(decompressedData) != totalUncompressedSize: raise TTLibError( 'unexpected size for decompressed font data: expected %d, found %d' % (totalUncompressedSize, len(decompressedData))) self.transformBuffer = BytesIO(decompressedData) self.file.seek(0, 2) if self.length != self.file.tell(): raise TTLibError("reported 'length' doesn't match the actual file size") self.flavorData = WOFF2FlavorData(self) # make empty TTFont to store data while reconstructing tables self.ttFont = TTFont(recalcBBoxes=False, recalcTimestamp=False)
def _decode(self, body, encoding): if encoding == b'gzip' or encoding == b'x-gzip': body = gunzip(body) if encoding == b'deflate': try: body = zlib.decompress(body) except zlib.error: # ugly hack to work with raw deflate content that may # be sent by microsoft servers. For more information, see: # http://carsten.codimi.de/gzip.yaws/ # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx # http://www.gzip.org/zlib/zlib_faq.html#faq38 body = zlib.decompress(body, -15) if encoding == b'br' and b'br' in ACCEPTED_ENCODINGS: body = brotli.decompress(body) return body
def decompress(data, method='gz'): """ Decompress *data* previously compressed with the specified compression *method*. """ if method == 'gz': return gzip.decompress(data) elif method == 'zip': return zlib.decompress(data) elif method == 'bz': return zlib.decompress(data) elif method == 'brt': if not brotli: raise ImportError('brotli') return brotli.decompress(bytes(data)) else: raise ValueError('invalid method: {0!r}'.format(method))
def __init__(self, reader=None): if not haveBrotli: raise ImportError("No module named brotli") self.majorVersion = None self.minorVersion = None self.metaData = None self.privData = None if reader: self.majorVersion = reader.majorVersion self.minorVersion = reader.minorVersion if reader.metaLength: reader.file.seek(reader.metaOffset) rawData = reader.file.read(reader.metaLength) assert len(rawData) == reader.metaLength data = brotli.decompress(rawData) assert len(data) == reader.metaOrigLength self.metaData = data if reader.privLength: reader.file.seek(reader.privOffset) data = reader.file.read(reader.privLength) assert len(data) == reader.privLength self.privData = data
def main(args): options = parse_options(args) if options.infile: if not os.path.isfile(options.infile): print('file "%s" not found' % options.infile, file=sys.stderr) sys.exit(1) with open(options.infile, "rb") as infile: data = infile.read() else: if sys.stdin.isatty(): # interactive console, just quit usage() infile = get_binary_stdio('stdin') data = infile.read() if options.outfile: if os.path.isfile(options.outfile) and not options.force: print('output file exists') sys.exit(1) outfile = open(options.outfile, "wb") else: outfile = get_binary_stdio('stdout') try: if options.decompress: data = brotli.decompress(data) else: data = brotli.compress(data, options.mode, options.transform) except brotli.error as e: print('[ERROR] %s: %s' % (e, options.infile or 'sys.stdin'), file=sys.stderr) sys.exit(1) outfile.write(data) outfile.close()
def _decompress(self, test_data): temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data) with open(temp_uncompressed, 'wb') as out_file: with open(test_data, 'rb') as in_file: out_file.write(brotli.decompress(in_file.read()))
# coding:utf-8 # Copyright (C) dirlt import brotli from req_pb2 import SearchResponse with open('response.content', 'rb') as fh: data = fh.read() def ensure_string(s, encoding='utf8'): if isinstance(s, bytes): return s.decode(encoding) return s def ensure_bytes(s, encoding='utf8'): if isinstance(s, str): return s.encode(encoding) return s data = brotli.decompress(data) # print(data) resp = SearchResponse() resp.ParseFromString(data[2:]) print(resp)
def main(args=None): parser = argparse.ArgumentParser( prog='bro.py', description="Compression/decompression utility using the Brotli algorithm.") parser.add_argument('--version', action='version', version=brotli.__version__) parser.add_argument('-i', '--input', metavar='FILE', type=str, dest='infile', help='Input file', default=None) parser.add_argument('-o', '--output', metavar='FILE', type=str, dest='outfile', help='Output file', default=None) parser.add_argument('-f', '--force', action='store_true', help='Overwrite existing output file', default=False) parser.add_argument('-d', '--decompress', action='store_true', help='Decompress input file', default=False) params = parser.add_argument_group('optional encoder parameters') params.add_argument('-m', '--mode', metavar="MODE", type=int, choices=[0, 1], help='The compression mode can be 0 for generic input, ' '1 for UTF-8 encoded text, or 2 for WOFF 2.0 font data. ' 'Defaults to 0.') params.add_argument('-q', '--quality', metavar="QUALITY", type=int, choices=list(range(0, 12)), help='Controls the compression-speed vs compression-density ' 'tradeoff. The higher the quality, the slower the ' 'compression. Range is 0 to 11. Defaults to 11.') params.add_argument('--lgwin', metavar="LGWIN", type=int, choices=list(range(16, 25)), help='Base 2 logarithm of the sliding window size. Range is ' '10 to 24. Defaults to 22.') params.add_argument('--lgblock', metavar="LGBLOCK", type=int, choices=[0] + list(range(16, 25)), help='Base 2 logarithm of the maximum input block size. ' 'Range is 16 to 24. If set to 0, the value will be set based ' 'on the quality. Defaults to 0.') # set default values using global DEFAULT_PARAMS dictionary parser.set_defaults(**DEFAULT_PARAMS) options = parser.parse_args(args=args) if options.infile: if not os.path.isfile(options.infile): parser.error('file "%s" not found' % options.infile) with open(options.infile, "rb") as infile: data = infile.read() else: if sys.stdin.isatty(): # interactive console, just quit parser.error('no input') infile = get_binary_stdio('stdin') data = infile.read() if options.outfile: if os.path.isfile(options.outfile) and not options.force: parser.error('output file exists') outfile = open(options.outfile, "wb") else: outfile = get_binary_stdio('stdout') try: if options.decompress: data = brotli.decompress(data) else: data = brotli.compress( data, mode=options.mode, quality=options.quality, lgwin=options.lgwin, lgblock=options.lgblock) except brotli.error as e: parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin')) outfile.write(data) outfile.close()
def test_get_brotli(self): url = storage.staticfiles_storage.url(self.static_files.js_path) response = self.server.get(url, headers={'Accept-Encoding': 'gzip, br'}) self.assertEqual(brotli.decompress(response.content), self.static_files.js_content) self.assertEqual(response.headers['Content-Encoding'], 'br') self.assertEqual(response.headers['Vary'], 'Accept-Encoding')
import sys import brotli from kyotocabinet import * import http_pb2 db = DB() db.open(sys.argv[1], DB.OREADER) cur = db.cursor() cur.jump() ret = cur.get(True) while ret: web = http_pb2.HttpWeb() proto = brotli.decompress(ret[1]) web.ParseFromString(proto) print ret[0] print web ret = cur.get(True)
def decode_br(content): decompress_content = brotli.decompress(content) try: return decompress_content except (IOError, EOFError): return None
def decode_brotli(content: bytes) -> bytes: if not content: return b"" return brotli.decompress(content)
def brotli_decompress(data, uncompressed_size): return brotli.decompress(data)
def decode_brotli(content): return brotli.decompress(content)
def decode_brotli(data): return brotli.decompress(data)
def test_garbage_appended(self): with self.assertRaises(brotli.error): brotli.decompress(brotli.compress(b'a') + b'a')
def test_compressed_data_roundtrips(s): assert brotli.decompress(brotli.compress(s)) == s
def main(): parser = argparse.ArgumentParser( prog="bro.py", description="Compression/decompression utility using the Brotli algorithm." ) parser.add_argument("--version", action="version", version=brotli.__version__) parser.add_argument("-i", "--input", metavar="FILE", type=str, dest="infile", help="Input file", default=None) parser.add_argument("-o", "--output", metavar="FILE", type=str, dest="outfile", help="Output file", default=None) parser.add_argument("-f", "--force", action="store_true", help="Overwrite existing output file", default=False) parser.add_argument("-d", "--decompress", action="store_true", help="Decompress input file", default=False) params = parser.add_argument_group("optional encoder parameters") params.add_argument( "-m", "--mode", metavar="MODE", type=int, choices=[0, 1], help="The compression mode can be 0 for generic input, " "1 for UTF-8 encoded text, or 2 for WOFF 2.0 font data." "Defaults to 0.", ) params.add_argument( "-q", "--quality", metavar="QUALITY", type=int, choices=list(range(0, 12)), help="Controls the compression-speed vs compression-density " "tradeoff. The higher the quality, the slower the " "compression. Range is 0 to 11. Defaults to 11.", ) params.add_argument( "--lgwin", metavar="LGWIN", type=int, choices=list(range(16, 25)), help="Base 2 logarithm of the sliding window size. Range is " "16 to 24. Defaults to 22.", ) params.add_argument( "--lgblock", metavar="LGBLOCK", type=int, choices=[0] + list(range(16, 25)), help="Base 2 logarithm of the maximum input block size. " "Range is 16 to 24. If set to 0, the value will be set based " "on the quality. Defaults to 0.", ) # set default values using global DEFAULT_PARAMS dictionary parser.set_defaults(**DEFAULT_PARAMS) options = parser.parse_args() if options.infile: if not os.path.isfile(options.infile): parser.error('file "%s" not found' % options.infile) with open(options.infile, "rb") as infile: data = infile.read() else: if sys.stdin.isatty(): # interactive console, just quit parser.error("no input") infile = get_binary_stdio("stdin") data = infile.read() if options.outfile: if os.path.isfile(options.outfile) and not options.force: parser.error("output file exists") outfile = open(options.outfile, "wb") else: outfile = get_binary_stdio("stdout") try: if options.decompress: data = brotli.decompress(data) else: data = brotli.compress( data, mode=options.mode, quality=options.quality, lgwin=options.lgwin, lgblock=options.lgblock ) except brotli.error as e: parser.exit(1, "bro: error: %s: %s" % (e, options.infile or "sys.stdin")) outfile.write(data) outfile.close()
def test_decompression_fails_properly_on_garbage(bogus, exception_cls): """ Garbage data properly fails decompression. """ with pytest.raises(exception_cls): brotli.decompress(bogus)