Ejemplo n.º 1
0
def decompress(body_bytes, content_encoding, url=None):
    content_encoding = content_encoding.lower()
    if content_encoding == 'deflate':
        try:
            return zlib.decompress(body_bytes, zlib.MAX_WBITS)  # expects header/checksum
        except Exception:
            try:
                # http://www.gzip.org/zlib/zlib_faq.html#faq38
                stats.stats_sum('content-encoding deflate fallback try', 1)
                return zlib.decompress(body_bytes, -zlib.MAX_WBITS)  # no header/checksum
            except Exception as e:
                LOGGER.debug('deflate fail for url %s: %s', url, str(e))
                stats.stats_sum('content-encoding deflate fail', 1)
                return body_bytes
    elif content_encoding == 'gzip' or content_encoding == 'x-gzip':
        try:
            return zlib.decompress(body_bytes, 16 + zlib.MAX_WBITS)
        except Exception as e:
            LOGGER.debug('gzip fail for url %s: %s', url, str(e))
            stats.stats_sum('content-encoding gzip fail', 1)
            return body_bytes
    elif content_encoding == 'br':
        try:
            return brotli.decompress(body_bytes)
        except Exception as e:
            LOGGER.debug('bz fail for url %s: %s', url, str(e))
            stats.stats_sum('content-encoding brotli fail', 1)
            return body_bytes
    else:
        # 'identity' is in the standard
        # also fairly common to have 'raw', 'none', or a charset
        return body_bytes
Ejemplo n.º 2
0
def test_streaming_compression_flush(one_compressed_file,
                                     chunk_size,
                                     mode,
                                     quality,
                                     lgwin,
                                     lgblock):
    """
    Confirm that the streaming compressor works as expected, including flushes
    after each chunk.
    """
    compressed_chunks = []
    c = brotli.Compressor(
        mode=mode, quality=quality, lgwin=lgwin, lgblock=lgblock
    )
    with open(one_compressed_file, 'rb') as f:
        while True:
            next_data = f.read(chunk_size)
            if not next_data:
                break

            compressed_chunks.append(c.compress(next_data))
            compressed_chunks.append(c.flush())

    compressed_chunks.append(c.finish())
    decompressed = brotli.decompress(b''.join(compressed_chunks))
    with open(one_compressed_file, 'rb') as f:
        assert decompressed == f.read()
Ejemplo n.º 3
0
    def request(self, method, url, *args, **kwargs):
        ourSuper = super(CloudScraper, self)
        resp = ourSuper.request(method, url, *args, **kwargs)

        if resp.headers.get('Content-Encoding') == 'br':
            if self.allow_brotli and resp._content:
                resp._content = brotli.decompress(resp.content)
            else:
                logging.warning('Brotli content detected, But option is disabled, we will not continue.')
                return resp

        # Debug request
        if self.debug:
            self.debugRequest(resp)

        # Check if Cloudflare anti-bot is on
        if self.isChallengeRequest(resp):
            if resp.request.method != 'GET':
                # Work around if the initial request is not a GET,
                # Supersede with a GET then re-request the original METHOD.
                self.request('GET', resp.url)
                resp = ourSuper.request(method, url, *args, **kwargs)
            else:
                # Solve Challenge
                resp = self.sendChallengeResponse(resp, **kwargs)

        return resp
Ejemplo n.º 4
0
def _decompress(input_file_name, use_gzip=True):
    if use_gzip:
        with gzip.open(input_file_name, 'rb') as in_f:
            return in_f.read().decode()
    else:
        with open(input_file_name, 'rb') as in_f:
            return brotli.decompress(in_f.read()).decode()
Ejemplo n.º 5
0
def download_uncompress(url, dest):
    print('Downloading {}'.format(url))
    with urllib.request.urlopen(url) as response:
        data = brotli.decompress(response.read())
        print('Extracted to {} bytes'.format(len(data)))
        os.makedirs(os.path.dirname(dest), exist_ok=True)
        with open(dest, 'wb+') as file:
            file.write(data)
            print('Decompressed to {}'.format(dest))
    def _get_total_datapoints(self):
        r = self.sdpf()
        base_dir = self.get_path_in_dir('')

        datatype_map = {
            'No. of Cases (Lab Confirmed)': DataTypes.CONFIRMED,
            'No. of Cases (on admission)': DataTypes.PROBABLE,
            'No. Discharged': None,
            'No. of Deaths': DataTypes.STATUS_DEATHS
        }

        for date in self.iter_nonempty_dirs(base_dir):
            path = base_dir / date / 'index.html'
            try:
                with open(path, 'r', encoding='utf-8') as f:
                    html = f.read()
            except UnicodeDecodeError:
                import brotli
                with open(path, 'rb') as f:
                    html = brotli.decompress(f.read()).decode('utf-8')

            table = pq(html)('table:contains("States Affected")')
            datatypes = [
                datatype_map[pq(i).text().strip()]
                for i in pq(table)('thead tr th')[1:]
            ]
            region_trs = pq(table)('tbody tr')

            for region_tr in region_trs:
                region = pq(region_tr[0]).text().strip()
                vals = {}

                for datatype, value in zip(datatypes, region_tr[1:]):
                    if not datatype:
                        continue

                    value = int(pq(value).text().replace(',', ''))
                    vals[datatype] = value

                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='NG',
                             region_child=region,
                             datatype=datatype,
                             value=value,
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                r.append(region_schema=Schemas.ADMIN_1,
                         region_parent='NG',
                         region_child=region,
                         datatype=DataTypes.TOTAL,
                         value=vals[DataTypes.CONFIRMED] +
                         vals[DataTypes.PROBABLE],
                         date_updated=date,
                         source_url=self.SOURCE_URL)

        return r
Ejemplo n.º 7
0
def proc_search(domain, url, headers, key):

    print("searching", url + key)

    href_list = []
    tit_list = []

    special_getter = False

    for k in prefix_getter:
        if url.startswith(k):
            href_list, tit_list = prefix_getter[k](key)
            special_getter = True
            break

    if not special_getter:
        res = requests.Response()
        try:
            s = requests.Session()
            s.mount('https://', HTTPAdapter(max_retries=3))
            res = s.get(url + key, headers=headers)
            s.close()
        except:
            log('Failed fetching:\n\t' + url + key)
            return

        content = res.content
        if res.headers.get('Content-Encoding') == 'br':
            content = brotli.decompress(content)
        content = content.decode("utf-8")
        html = content

        soup = BeautifulSoup(html, "lxml")

        for a in soup.find_all('a', href=True):
            if a.has_attr('title'):
                tit_list.append(a['title'])
            else:
                tit_list.append(a.text)
            href_list.append(a['href'])

    if tit_list is None and href is None:
        log('Failed fetching:\n\t' + url + key)
        return

    for a in tit_list:
        seg_list = jieba.cut_for_search(a)
        for w in seg_list:
            if w not in searched:
                new_key.add(w)

    for href in href_list:
        _url = parse_url(href, domain)
        if _url is not None:
            get_webpage(domain, _url, headers)

    print(url, "Fin")
def test_roundtrip_compression_with_files(simple_compressed_file):
    """
    Roundtripping data through the compressor works correctly.
    """
    with open(simple_compressed_file[0], 'rb') as f:
        uncompressed_data = f.read()

    assert brotli.decompress(
        brotli.compress(uncompressed_data)) == uncompressed_data
    def test_normal_page(self):
        @compress_page
        def a_view(request):
            return self.resp

        r = a_view(self.req)
        self.assertEqual(r.get('Content-Encoding'), 'br')
        self.assertEqual(r.get('Content-Length'), str(len(r.content)))
        self.assertTrue(brotli.decompress(r.content), self.compressible_string)
Ejemplo n.º 10
0
 def _decode(self, response):
     content = response.content
     content_encoding = response.headers.get('content-encoding')
     if content_encoding == 'br':
         content = brotli.decompress(content)
     charset_encoding = chardet.detect(content).get('encoding')
     if charset_encoding == 'ascii':
         charset_encoding = 'unicode_escape'
     return content.decode(charset_encoding, 'ignore')
Ejemplo n.º 11
0
def read_file(name, useBrotli=False):
    with open(name, "rb") as f:
        s = f.read()
        sl = len(s)
        if useBrotli:
            s = brotli.decompress(s)
            ratio = (sl / len(s)) * 100.0
            logging.debug(f"w {name}: brotli {sl} -> {len(s)}, {ratio:.1f}%")
        return s
Ejemplo n.º 12
0
    async def worker(url_iterator: Iterator,
                     token_receiver: trio.abc.ReceiveChannel):
        async with token_receiver:
            for url in url_iterator:
                await token_receiver.receive()

                print(
                    f"[{round(trio.current_time(), 2)}] Start loading link: {url}"
                )
                try:
                    async with httpx.AsyncClient() as client:
                        optanon_timestamp = quote_plus(
                            datetime.datetime.now().strftime(
                                "%a+%b+%d+%Y+%H:%M:%S"))
                        past = (datetime.datetime.now() +
                                datetime.timedelta(hours=7) -
                                datetime.timedelta(
                                    minutes=random.randrange(0, 8),
                                    seconds=random.randrange(0, 60),
                                ))
                        optanon_past_timestamp = past.strftime(
                            f"%Y-%m-%dT%H:%M:%S.{random.randrange(0, 999)}Z")

                        default_cookies = OrderedDict({
                            "ccpa-state":
                            "No",
                            "OptanonConsent":
                            f"isIABGlobal=true&datestamp={optanon_timestamp}+GMT-0700+(Pacific+Daylight+Time)&version=5.9.0&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0004%3A1&hosts=&geolocation=US%3BCA&AwaitingReconsent=false",
                            "OptanonGlobal":
                            f"isIABGlobal=false&datestamp={optanon_timestamp}+GMT-0700+(Pacific+Daylight+Time)&version=5.15.0&landingPath=NotLandingPage&groups=C0003%3A1%2CC0004%3A1%2CC0005%3A1%2CBG50%3A1%2CC0002%3A1%2CC0001%3A1&hosts=xvr%3A1%2CH35%3A1%2Cxik%3A1%2Cudm%3A1%2Cots%3A1%2CH99%3A1%2Cyla%3A1%2Cixz%3A1%2Cziw%3A1%2CH253%3A1%2Cmwk%3A1%2Czci%3A1%2Cjjk%3A1%2Ceuw%3A1%2Cdwu%3A1%2Ceyl%3A1%2CH28%3A1%2Cbup%3A1%2Cdce%3A1%2CH30%3A1%2Coom%3A1%2Copx%3A1%2CH151%3A1%2Cpjw%3A1%2Cgzg%3A1%2Cywk%3A1%2Cdnm%3A1%2Cwjk%3A1%2Cuuk%3A1%2Cudt%3A1%2Czgf%3A1%2Cayv%3A1%2Crai%3A1%2Cktz%3A1%2Cdfh%3A1%2Clck%3A1%2CH117%3A1%2Chty%3A1%2Cszd%3A1%2Cbax%3A1%2Cymj%3A1%2Cjjg%3A1%2Chbz%3A1%2Cdui%3A1%2Cstj%3A1%2Cyqw%3A1%2Cddu%3A1%2Ccnt%3A1%2CH59%3A1%2Cyze%3A1%2CH80%3A1%2Ctif%3A1%2Cdvt%3A1%2Csjs%3A1%2Cviv%3A1%2Catx%3A1%2CH212%3A1%2Caiy%3A1%2Cqsc%3A1%2Cbro%3A1%2Capv%3A1%2Cvhh%3A1%2Cslt%3A1%2Cmlc%3A1%2Czsx%3A1%2CH155%3A1%2Cqih%3A1%2CH122%3A1%2CH32%3A1%2Cwjk%3A1%2Caso%3A1%2Cvpf%3A1%2Cbhq%3A1%2Cvrh%3A1%2CH37%3A1%2Cuuk%3A1%2Cwtu%3A1%2Chiz%3A1%2CH65%3A1%2CH68%3A1%2Czsx%3A1&legInt=&AwaitingReconsent=false",
                        })
                        try:
                            response = await client.get(
                                url,
                                headers=default_headers,
                                cookies=default_cookies,
                                timeout=5)
                            if ("content-encoding" in response.headers
                                    and response.headers["content-encoding"]
                                    == "br"):
                                try:
                                    response.decoded = brotli.decompress(
                                        response.content).decode(
                                            response.encoding)
                                except Exception as e:
                                    response.decoded = response.content.decode(
                                        response.encoding)
                            else:
                                response.decoded = response.content.decode(
                                    response.encoding)
                        except Exception as e:
                            response = f"{e.__class__.__name__} :: {e}"

                    responses[url] = response
                except Exception as e:
                    response = f"[ fetch_urls ] No response from url {url}: {e.__class__.__name__} :: {e}"
                    responses[url] = response
Ejemplo n.º 13
0
 def _check_decompression(self, test_data):
     # Write decompression to temp file and verify it matches the original.
     temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
     temp_compressed = _test_utils.get_temp_compressed_name(test_data)
     original = test_data
     with open(temp_uncompressed, 'wb') as out_file:
         with open(temp_compressed, 'rb') as in_file:
             out_file.write(brotli.decompress(in_file.read()))
     self.assertFilesMatch(temp_uncompressed, original)
Ejemplo n.º 14
0
 def decompress(self, encoding, data):
     encoding = ensure_text(encoding)
     if encoding == 'gzip':
         return gzip.decompress(data)
     elif encoding == 'br':
         return brotli.decompress(data)
     elif encoding == 'deflate':
         return zlib.decompress(data)
     return data
Ejemplo n.º 15
0
 def _check_decompression(self, test_data):
     # Write decompression to temp file and verify it matches the original.
     temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
     temp_compressed = _test_utils.get_temp_compressed_name(test_data)
     original = test_data
     with open(temp_uncompressed, 'wb') as out_file:
         with open(temp_compressed, 'rb') as in_file:
             out_file.write(brotli.decompress(in_file.read()))
     self.assertFilesMatch(temp_uncompressed, original)
Ejemplo n.º 16
0
async def songdb(timeout: int = config.TIMEOUT) -> Any:
    """
    返回 ``song_id`` - 歌名(en|jp) 的字典表
    """
    async with websockets.connect(config.ESTERTION_URI, timeout=timeout) as ws:
        await ws.send('constants')
        r = await ws.recv()
        await ws.close()
        return json.loads(brotli.decompress(r))
Ejemplo n.º 17
0
 def _check_decompression_matches(self, test_data):
     # Write decompression to temp file and verify it matches the original.
     with open(get_temp_uncompressed_name(test_data), 'wb') as out_file:
         with open(get_temp_compressed_name(test_data), 'rb') as in_file:
             out_file.write(brotli.decompress(in_file.read()))
     self.assertTrue(
         filecmp.cmp(get_temp_uncompressed_name(test_data),
                     test_data,
                     shallow=False))
Ejemplo n.º 18
0
def decompress(data: bytes, encoding: str) -> bytes:
    if encoding == 'gzip':
        data = gzip.decompress(data)
    elif encoding == 'br':
        data = brotli.decompress(data)
    elif encoding == 'deflate':
        data = zlib.decompress(data)

    return data
Ejemplo n.º 19
0
 def post_dissect(self, s):
     if not conf.contribs["http"]["auto_compression"]:
         return s
     encodings = self._get_encodings()
     # Un-chunkify
     if "chunked" in encodings:
         data = b""
         while s:
             length, _, body = s.partition(b"\r\n")
             try:
                 length = int(length, 16)
             except ValueError:
                 # Not a valid chunk. Ignore
                 break
             else:
                 load = body[:length]
                 if body[length:length + 2] != b"\r\n":
                     # Invalid chunk. Ignore
                     break
                 s = body[length + 2:]
                 data += load
         if not s:
             s = data
     # Decompress
     try:
         if "deflate" in encodings:
             import zlib
             s = zlib.decompress(s)
         elif "gzip" in encodings:
             s = gzip_decompress(s)
         elif "compress" in encodings:
             import lzw
             s = lzw.decompress(s)
         elif "br" in encodings:
             if _is_brotli_available:
                 s = brotli.decompress(s)
             else:
                 log_loading.info(
                     "Can't import brotli. brotli decompression "
                     "will be ignored !")
         elif "zstd" in encodings:
             if _is_zstd_available:
                 # Using its streaming API since its simple API could handle
                 # only cases where there is content size data embedded in
                 # the frame
                 bio = io.BytesIO(s)
                 reader = zstandard.ZstdDecompressor().stream_reader(bio)
                 s = reader.read()
             else:
                 log_loading.info(
                     "Can't import zstandard. zstd decompression "
                     "will be ignored !")
     except Exception:
         # Cannot decompress - probably incomplete data
         pass
     return s
Ejemplo n.º 20
0
def response_hook(resp, *args, **kwargs):
    # parse the json storing the result on the response object
    try:
        dec_res = brotli.decompress(resp.content)
        content = json.loads(dec_res)
        resp.dec_result = content['results']
        global curr_req_max
        curr_req_max = content['total']
    except:
        pass
Ejemplo n.º 21
0
def test_roundtrip_compression_with_files(simple_compressed_file):
    """
    Roundtripping data through the compressor works correctly.
    """
    with open(simple_compressed_file[0], 'rb') as f:
        uncompressed_data = f.read()

    assert brotli.decompress(
        brotli.compress(uncompressed_data)
    ) == uncompressed_data
Ejemplo n.º 22
0
    def hit(self, url):

        request_obj = Request(url)
        request_obj = self.add_headers(request_obj)

        response_obj = request.urlopen(request_obj)
        self.cookies = response_obj.getheader('Set-Cookie')
        self.content = decompress(response_obj.read()).decode('utf-8')

        return
Ejemplo n.º 23
0
def get(path):
    url = base + path
    if cache.get(path) is not None:
        html = brotli.decompress(cache[url])
    else:
        html = requests.get(url).text
        redis_queue.put((path, brotli.compress(html.encode(),
                                               brotli.MODE_TEXT)))

    return html
Ejemplo n.º 24
0
    def decode_brotli(self, data):
        '''
		还原 HTTP 响应中采用 brotli 压缩的数据
		标识:
		Content-Encoding: br
		'''
        try:
            return brotli.decompress(data)
        except:
            return data
Ejemplo n.º 25
0
def decompress_brotli(data: bytes) -> bytes:
    """decompresses brotli-compressed data

	:param data: compressed data
	:type data: bytes
	:raises brotli.error: BrotliDecompress failed
	:return: uncompressed data
	:rtype: bytes
	"""
    return brotli.decompress(data)
Ejemplo n.º 26
0
def decode_content(content, encoding_header):
    encodings = encoding_header.replace(' ', '').split(',')
    for encoding in reversed(encodings):
        if encoding == 'identity':
            continue
        if encoding == 'br':
            content = brotli.decompress(content)
        elif encoding == 'gzip':
            content = gzip.decompress(content)
    return content
Ejemplo n.º 27
0
    def test_compress(self):
        file = BytesIO(content)

        compressor = BrotliCompressor()
        out = compressor.compress("", file)
        self.assertGreater(out.size, 0)
        self.assertLessEqual(out.size, len(content))

        result = brotli.decompress(out.read())
        self.assertEqual(result, content)
Ejemplo n.º 28
0
 def brdecompress(payload, log):
     try:
         import brotli
         data = brotli.decompress(payload)
         return data
     except Exception:
         Logger.log(
             "Either brotli decompress failed or discord returned incorrect content encodings.",
             None, log)  #yea, it happens :/
         return payload
Ejemplo n.º 29
0
    def brotli_compress(self, filepath):
        """
        Compress a file with Brotli. Save it to the same directory as the
        input filepath.

        Returns the input filepath + '.br'
        """
        ext = os.path.splitext(filepath)[1]

        # Check 1: Make sure the extension is okay
        if ext not in self.included_filetypes:
            return None

        new_filepath = filepath + '.br'

        in_file_content = None
        with self.open(filepath, 'rb') as in_file:
            in_file_content = in_file.read()

            # Check 2: Check that the file isn't already brotli-compressed.
            # Brotli has no magic number like gzip, but when we try to
            # decompress a non-compressed file Brotli throws an error.
            try:
                brotli.decompress(in_file_content)
            except brotli.error:  # BrotliDecompress failed
                # file is not compressed
                in_file.seek(0)
            else:
                # file is compressed: don't re-compress it
                return None

            # Check 3: If the file is smaller than MINIMUM_SIZE_FOR_COMPRESSION
            # compression can be ineffective resulting in a larger file
            # instead of a smaller one.
            if len(in_file_content) < self.MINIMUM_SIZE_FOR_COMPRESSION:
                return None

            compressed_contents = brotli.compress(in_file_content, quality=11)
            if self.exists(new_filepath):
                self.delete(new_filepath)
            self.save(new_filepath, ContentFile(compressed_contents))
        return new_filepath
    def test_streaming_page(self):
        @compress_page
        def a_streaming_view(request):
            return self.stream_resp_unicode

        r = a_streaming_view(self.req)
        self.assertEqual(r.get('Content-Encoding'), 'br')
        self.assertFalse(r.has_header('Content-Length'))
        self.assertEqual(
            brotli.decompress(b''.join(r)),
            b''.join(x.encode('utf-8') for x in self.sequence_unicode))
 def test_compress_streaming_response(self):
     """
     Compression is performed on responses with streaming content.
     """
     r = CompressionMiddleware().process_response(self.req,
                                                  self.stream_resp)
     self.assertEqual(brotli.decompress(b''.join(r)),
                      b''.join(self.sequence))
     self.assertEqual(r.get('Content-Encoding'), 'br')
     self.assertFalse(r.has_header('Content-Length'))
     self.assertEqual(r.get('Vary'), 'Accept-Encoding')
Ejemplo n.º 32
0
    def descargar_dataset_url(self, apiurl, verbose=True, dataframe=True):
        """Descargar un dataset usando directamente un API URL provisto.

        Este método es útil para descargar datasets de APIs que aun no esten
        implementados dentro del atributo de 'api_urls'. Tambien funciona
        para bajar la data cruda al escoger False para el parametro de 'dataframe'.

        Parametros:
            apiurl (str): URL completo (e.g. 'https://[...]') de un API público de BioPortal.
            verbose (bool; opcional): Opcion para imprimir detalles a lo largo de la descarga.
            dataframe (bool; opcional): Opcion para procesar datos descargados a DataFrame.
                                            Elija False para devolver data cruda.

        Devuelve:
            apidata (str o pandas.DataFrame): Datos descargados. Crudos (str) si 'dataframe' es False;
                                                si no, procesados a un pandas.DataFrame.
            exitoso (bool): Bandera clarificando si el api pudo descargarse exitosamente.
        """
        self.apidata = None

        try:
            r = requests.get(apiurl,
                             headers={'Accept-Encoding': 'br'},
                             timeout=(15, None))
        except requests.exceptions.Timeout:
            # Se quedo pegao
            print("Servidor no responde.")
            return (None, False)

        # Status code
        if r.status_code != 200:
            print("Respuesta inesperada del servidor (#{})".format(
                r.status_code))
            return (None, False)

        apidata_raw = r.content

        if r.encoding.startswith('ISO'):
            if r.content.decode().startswith('<!DOCTYPE html>'):
                print(
                    "Servidor no esperaba que bajaras este API (devolvio un documento HTML)"
                )
                return (None, False)

        try:
            apidata_raw = (brotli.decompress(apidata_raw))
        except:
            pass

        if dataframe:
            apidata = pd.json_normalize(json.loads(apidata_raw), sep='_')
        else:
            apidata = apidata_raw
        return (apidata, True)
def test_decompression(simple_compressed_file):
    """
    Decompressing files returns their original form using decompress.
    """
    with open(simple_compressed_file[0], 'rb') as f:
        uncompressed_data = f.read()

    with open(simple_compressed_file[1], 'rb') as f:
        compressed_data = f.read()

    assert brotli.decompress(compressed_data) == uncompressed_data
Ejemplo n.º 34
0
 def decompress(self, widget):
     dest_filename = self.filename.split('.')[:-1]
     dest_filename = '.'.join(dest_filename)
     self.out = self.out + dest_filename
     file = self.file
     with open(file, 'rb') as infile:
         filedata = infile.read()
     data = brotli.decompress(filedata)
     outfile = open(self.out, 'wb')
     outfile.write(data)
     outfile.close()
Ejemplo n.º 35
0
def download_index_report(dest_folder: str = './indices/reports/',
                          date=datetime.today()):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)  # create folder if it does not exist

    #url = 'https://www1.nseindia.com/content/indices/ind_close_all_{}.csv'.format(date.strftime("%d%m%Y"))
    url = 'https://www1.nseindia.com/homepage/Indices1.json'
    filename = 'ind_close_all_{}.csv'.format(date.strftime("%d%m%Y"))
    file_path = os.path.join(dest_folder, filename)

    print('URL: ' + url)

    header = {
        'method':
        'GET',
        'scheme':
        'https',
        #'authority': 'nseindia.com',
        'accept':
        'application/json, text/plain, */*',
        'accept-encoding':
        'gzip, deflate, br',
        'accept-language':
        'en-US,en;q=0.9',
        'dnt':
        '1',
        'host':
        'www1.nseindia.com',
        'referer':
        'https://www1.nseindia.com/products/content/equities/indices/homepage_indices.htm',
        'user-agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
    }
    try:
        #print(header)
        req = urllib.request.Request(url, headers=header)
        response = urllib.request.urlopen(req)
        coder = response.headers.get('Content-Encoding', 'utf-8')
        #print('Coder: {}'.format(coder))
        if coder == 'br':
            html_page = brotli.decompress(response.read()).decode('utf-8')
        elif coder == 'gzip':
            buf = BytesIO(response.read())
            html_page = gzip.GzipFile(fileobj=buf).read().decode('utf-8')
            print('Got gzip response')
        else:
            html_page = response.read().decode('utf-8')
        import shutil
        with open(file_path, 'wb') as f:
            shutil.copyfileobj(html_page, f)
            print('Downloaded')
    except:
        traceback.print_exc()
        pass
Ejemplo n.º 36
0
 def load_courses(self):
     dir = os.path.join(self.workdir, 'courses')
     files = glob.glob(dir + '/*')
     courses = []
     for file in files:
         with open(file, 'rb') as fh:
             raw_data = fh.read()
             data = brotli.decompress(raw_data)
             js = json.loads(data)
             courses.extend(js['data']['courses'])
     self.courses = courses
Ejemplo n.º 37
0
 def _decode(self, body, encoding):
     if encoding == b'gzip' or encoding == b'x-gzip':
         body = gunzip(body)
     if encoding == b'deflate':
         try:
             body = zlib.decompress(body)
         except zlib.error:
             body = zlib.decompress(body, -15)
     if encoding == b'br' and b'br' in ACCEPTED_ENCODINGS:
         body = brotli.decompress(body)
     return body
def test_decompression(simple_compressed_file):
    """
    Decompressing files returns their original form using decompress.
    """
    with open(simple_compressed_file[0], 'rb') as f:
        uncompressed_data = f.read()

    with open(simple_compressed_file[1], 'rb') as f:
        compressed_data = f.read()

    assert brotli.decompress(compressed_data) == uncompressed_data
Ejemplo n.º 39
0
    def content(self) -> Union[str, bytes]:
        if not self._content:
            return ""

        decompressed = brotli.decompress(self._content)
        try:
            decompressed = decompressed.decode("utf8")
        except UnicodeDecodeError:
            pass

        return decompressed
Ejemplo n.º 40
0
 def _check_decompression(self, test_data, **kwargs):
     # Only dictionary is supported as a kwarg to brotli.decompress.
     if 'dictionary' in kwargs:
         kwargs = {'dictionary': kwargs['dictionary']}
     else:
         kwargs = {}
     # Write decompression to temp file and verify it matches the original.
     temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
     temp_compressed = _test_utils.get_temp_compressed_name(test_data)
     original = test_data
     with open(temp_uncompressed, 'wb') as out_file:
         with open(temp_compressed, 'rb') as in_file:
             out_file.write(brotli.decompress(in_file.read(), **kwargs))
     self.assertFilesMatch(temp_uncompressed, original)
Ejemplo n.º 41
0
    def test_brotli_accepted_no_change(self):
        import brotli
        content = brotli.compress('ABCDEFG'.encode('utf-8'))

        headers = {'Content-Type': 'application/octet-stream',
                   'Content-Encoding': 'br',
                   'Content-Length': str(len(content))
                  }

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
                                                  environ={'HTTP_ACCEPT_ENCODING': 'gzip, deflate, br'})

        assert headers['Content-Encoding'] == 'br'
        assert headers['Content-Length'] == str(len(content))

        assert brotli.decompress(b''.join(gen)).decode('utf-8') == 'ABCDEFG'
Ejemplo n.º 42
0
	def __init__(self, file, checkChecksums=1, fontNumber=-1):
		if not haveBrotli:
			log.error(
				'The WOFF2 decoder requires the Brotli Python extension, available at: '
				'https://github.com/google/brotli')
			raise ImportError("No module named brotli")

		self.file = file

		signature = Tag(self.file.read(4))
		if signature != b"wOF2":
			raise TTLibError("Not a WOFF2 font (bad signature)")

		self.file.seek(0)
		self.DirectoryEntry = WOFF2DirectoryEntry
		data = self.file.read(woff2DirectorySize)
		if len(data) != woff2DirectorySize:
			raise TTLibError('Not a WOFF2 font (not enough data)')
		sstruct.unpack(woff2DirectoryFormat, data, self)

		self.tables = OrderedDict()
		offset = 0
		for i in range(self.numTables):
			entry = self.DirectoryEntry()
			entry.fromFile(self.file)
			tag = Tag(entry.tag)
			self.tables[tag] = entry
			entry.offset = offset
			offset += entry.length

		totalUncompressedSize = offset
		compressedData = self.file.read(self.totalCompressedSize)
		decompressedData = brotli.decompress(compressedData)
		if len(decompressedData) != totalUncompressedSize:
			raise TTLibError(
				'unexpected size for decompressed font data: expected %d, found %d'
				% (totalUncompressedSize, len(decompressedData)))
		self.transformBuffer = BytesIO(decompressedData)

		self.file.seek(0, 2)
		if self.length != self.file.tell():
			raise TTLibError("reported 'length' doesn't match the actual file size")

		self.flavorData = WOFF2FlavorData(self)

		# make empty TTFont to store data while reconstructing tables
		self.ttFont = TTFont(recalcBBoxes=False, recalcTimestamp=False)
Ejemplo n.º 43
0
    def _decode(self, body, encoding):
        if encoding == b'gzip' or encoding == b'x-gzip':
            body = gunzip(body)

        if encoding == b'deflate':
            try:
                body = zlib.decompress(body)
            except zlib.error:
                # ugly hack to work with raw deflate content that may
                # be sent by microsoft servers. For more information, see:
                # http://carsten.codimi.de/gzip.yaws/
                # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
                # http://www.gzip.org/zlib/zlib_faq.html#faq38
                body = zlib.decompress(body, -15)
        if encoding == b'br' and b'br' in ACCEPTED_ENCODINGS:
            body = brotli.decompress(body)
        return body
Ejemplo n.º 44
0
def decompress(data, method='gz'):
  """
  Decompress *data* previously compressed with the specified
  compression *method*.
  """

  if method == 'gz':
    return gzip.decompress(data)
  elif method == 'zip':
    return zlib.decompress(data)
  elif method == 'bz':
    return zlib.decompress(data)
  elif method == 'brt':
    if not brotli:
      raise ImportError('brotli')
    return brotli.decompress(bytes(data))
  else:
    raise ValueError('invalid method: {0!r}'.format(method))
Ejemplo n.º 45
0
	def __init__(self, reader=None):
		if not haveBrotli:
			raise ImportError("No module named brotli")
		self.majorVersion = None
		self.minorVersion = None
		self.metaData = None
		self.privData = None
		if reader:
			self.majorVersion = reader.majorVersion
			self.minorVersion = reader.minorVersion
			if reader.metaLength:
				reader.file.seek(reader.metaOffset)
				rawData = reader.file.read(reader.metaLength)
				assert len(rawData) == reader.metaLength
				data = brotli.decompress(rawData)
				assert len(data) == reader.metaOrigLength
				self.metaData = data
			if reader.privLength:
				reader.file.seek(reader.privOffset)
				data = reader.file.read(reader.privLength)
				assert len(data) == reader.privLength
				self.privData = data
Ejemplo n.º 46
0
def main(args):

    options = parse_options(args)

    if options.infile:
        if not os.path.isfile(options.infile):
            print('file "%s" not found' % options.infile, file=sys.stderr)
            sys.exit(1)
        with open(options.infile, "rb") as infile:
            data = infile.read()
    else:
        if sys.stdin.isatty():
            # interactive console, just quit
            usage()
        infile = get_binary_stdio('stdin')
        data = infile.read()

    if options.outfile:
        if os.path.isfile(options.outfile) and not options.force:
            print('output file exists')
            sys.exit(1)
        outfile = open(options.outfile, "wb")
    else:
        outfile = get_binary_stdio('stdout')

    try:
        if options.decompress:
            data = brotli.decompress(data)
        else:
            data = brotli.compress(data, options.mode, options.transform)
    except brotli.error as e:
        print('[ERROR] %s: %s' % (e, options.infile or 'sys.stdin'),
              file=sys.stderr)
        sys.exit(1)

    outfile.write(data)
    outfile.close()
Ejemplo n.º 47
0
 def _decompress(self, test_data):
     temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
     with open(temp_uncompressed, 'wb') as out_file:
         with open(test_data, 'rb') as in_file:
             out_file.write(brotli.decompress(in_file.read()))
Ejemplo n.º 48
0
# coding:utf-8
# Copyright (C) dirlt

import brotli

from req_pb2 import SearchResponse

with open('response.content', 'rb') as fh:
    data = fh.read()


def ensure_string(s, encoding='utf8'):
    if isinstance(s, bytes):
        return s.decode(encoding)
    return s


def ensure_bytes(s, encoding='utf8'):
    if isinstance(s, str):
        return s.encode(encoding)
    return s


data = brotli.decompress(data)
# print(data)


resp = SearchResponse()
resp.ParseFromString(data[2:])
print(resp)
Ejemplo n.º 49
0
def main(args=None):

    parser = argparse.ArgumentParser(
        prog='bro.py',
        description="Compression/decompression utility using the Brotli algorithm.")
    parser.add_argument('--version', action='version', version=brotli.__version__)
    parser.add_argument('-i', '--input', metavar='FILE', type=str, dest='infile',
                        help='Input file', default=None)
    parser.add_argument('-o', '--output', metavar='FILE', type=str, dest='outfile',
                        help='Output file', default=None)
    parser.add_argument('-f', '--force', action='store_true',
                        help='Overwrite existing output file', default=False)
    parser.add_argument('-d', '--decompress', action='store_true',
                        help='Decompress input file', default=False)
    params = parser.add_argument_group('optional encoder parameters')
    params.add_argument('-m', '--mode', metavar="MODE", type=int, choices=[0, 1],
                        help='The compression mode can be 0 for generic input, '
                        '1 for UTF-8 encoded text, or 2 for WOFF 2.0 font data. '
                        'Defaults to 0.')
    params.add_argument('-q', '--quality', metavar="QUALITY", type=int,
                        choices=list(range(0, 12)),
                        help='Controls the compression-speed vs compression-density '
                        'tradeoff. The higher the quality, the slower the '
                        'compression. Range is 0 to 11. Defaults to 11.')
    params.add_argument('--lgwin', metavar="LGWIN", type=int,
                        choices=list(range(16, 25)),
                        help='Base 2 logarithm of the sliding window size. Range is '
                        '10 to 24. Defaults to 22.')
    params.add_argument('--lgblock', metavar="LGBLOCK", type=int,
                        choices=[0] + list(range(16, 25)),
                        help='Base 2 logarithm of the maximum input block size. '
                        'Range is 16 to 24. If set to 0, the value will be set based '
                        'on the quality. Defaults to 0.')
    # set default values using global DEFAULT_PARAMS dictionary
    parser.set_defaults(**DEFAULT_PARAMS)

    options = parser.parse_args(args=args)

    if options.infile:
        if not os.path.isfile(options.infile):
            parser.error('file "%s" not found' % options.infile)
        with open(options.infile, "rb") as infile:
            data = infile.read()
    else:
        if sys.stdin.isatty():
            # interactive console, just quit
            parser.error('no input')
        infile = get_binary_stdio('stdin')
        data = infile.read()

    if options.outfile:
        if os.path.isfile(options.outfile) and not options.force:
            parser.error('output file exists')
        outfile = open(options.outfile, "wb")
    else:
        outfile = get_binary_stdio('stdout')

    try:
        if options.decompress:
            data = brotli.decompress(data)
        else:
            data = brotli.compress(
                data, mode=options.mode, quality=options.quality,
                lgwin=options.lgwin, lgblock=options.lgblock)
    except brotli.error as e:
        parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))

    outfile.write(data)
    outfile.close()
Ejemplo n.º 50
0
 def test_get_brotli(self):
     url = storage.staticfiles_storage.url(self.static_files.js_path)
     response = self.server.get(url, headers={'Accept-Encoding': 'gzip, br'})
     self.assertEqual(brotli.decompress(response.content), self.static_files.js_content)
     self.assertEqual(response.headers['Content-Encoding'], 'br')
     self.assertEqual(response.headers['Vary'], 'Accept-Encoding')
Ejemplo n.º 51
0
Archivo: list.py Proyecto: arall/wwitt
import sys
import brotli
from kyotocabinet import *
import http_pb2

db = DB()
db.open(sys.argv[1], DB.OREADER)

cur = db.cursor()
cur.jump()
ret = cur.get(True)

while ret:
    web = http_pb2.HttpWeb()
    proto = brotli.decompress(ret[1])
    web.ParseFromString(proto)
    print ret[0]
    print web
    ret = cur.get(True)

Ejemplo n.º 52
0
def decode_br(content):
    decompress_content = brotli.decompress(content)
    try:
        return decompress_content
    except (IOError, EOFError):
        return None
Ejemplo n.º 53
0
def decode_brotli(content: bytes) -> bytes:
    if not content:
        return b""
    return brotli.decompress(content)
Ejemplo n.º 54
0
 def brotli_decompress(data, uncompressed_size):
     return brotli.decompress(data)
Ejemplo n.º 55
0
def decode_brotli(content):
    return brotli.decompress(content)
Ejemplo n.º 56
0
def decode_brotli(data):
    return brotli.decompress(data)
Ejemplo n.º 57
0
 def test_garbage_appended(self):
     with self.assertRaises(brotli.error):
         brotli.decompress(brotli.compress(b'a') + b'a')
Ejemplo n.º 58
0
def test_compressed_data_roundtrips(s):
    assert brotli.decompress(brotli.compress(s)) == s
Ejemplo n.º 59
0
def main():

    parser = argparse.ArgumentParser(
        prog="bro.py", description="Compression/decompression utility using the Brotli algorithm."
    )
    parser.add_argument("--version", action="version", version=brotli.__version__)
    parser.add_argument("-i", "--input", metavar="FILE", type=str, dest="infile", help="Input file", default=None)
    parser.add_argument("-o", "--output", metavar="FILE", type=str, dest="outfile", help="Output file", default=None)
    parser.add_argument("-f", "--force", action="store_true", help="Overwrite existing output file", default=False)
    parser.add_argument("-d", "--decompress", action="store_true", help="Decompress input file", default=False)
    params = parser.add_argument_group("optional encoder parameters")
    params.add_argument(
        "-m",
        "--mode",
        metavar="MODE",
        type=int,
        choices=[0, 1],
        help="The compression mode can be 0 for generic input, "
        "1 for UTF-8 encoded text, or 2 for WOFF 2.0 font data."
        "Defaults to 0.",
    )
    params.add_argument(
        "-q",
        "--quality",
        metavar="QUALITY",
        type=int,
        choices=list(range(0, 12)),
        help="Controls the compression-speed vs compression-density "
        "tradeoff. The higher the quality, the slower the "
        "compression. Range is 0 to 11. Defaults to 11.",
    )
    params.add_argument(
        "--lgwin",
        metavar="LGWIN",
        type=int,
        choices=list(range(16, 25)),
        help="Base 2 logarithm of the sliding window size. Range is " "16 to 24. Defaults to 22.",
    )
    params.add_argument(
        "--lgblock",
        metavar="LGBLOCK",
        type=int,
        choices=[0] + list(range(16, 25)),
        help="Base 2 logarithm of the maximum input block size. "
        "Range is 16 to 24. If set to 0, the value will be set based "
        "on the quality. Defaults to 0.",
    )
    # set default values using global DEFAULT_PARAMS dictionary
    parser.set_defaults(**DEFAULT_PARAMS)

    options = parser.parse_args()

    if options.infile:
        if not os.path.isfile(options.infile):
            parser.error('file "%s" not found' % options.infile)
        with open(options.infile, "rb") as infile:
            data = infile.read()
    else:
        if sys.stdin.isatty():
            # interactive console, just quit
            parser.error("no input")
        infile = get_binary_stdio("stdin")
        data = infile.read()

    if options.outfile:
        if os.path.isfile(options.outfile) and not options.force:
            parser.error("output file exists")
        outfile = open(options.outfile, "wb")
    else:
        outfile = get_binary_stdio("stdout")

    try:
        if options.decompress:
            data = brotli.decompress(data)
        else:
            data = brotli.compress(
                data, mode=options.mode, quality=options.quality, lgwin=options.lgwin, lgblock=options.lgblock
            )
    except brotli.error as e:
        parser.exit(1, "bro: error: %s: %s" % (e, options.infile or "sys.stdin"))

    outfile.write(data)
    outfile.close()
def test_decompression_fails_properly_on_garbage(bogus, exception_cls):
    """
    Garbage data properly fails decompression.
    """
    with pytest.raises(exception_cls):
        brotli.decompress(bogus)