Python decompress Examples, gzip.decompress Python Examples

Example #1

0

Show file

File: zing_mp3_download.py Project: dolphyvn/zing_mp3_downloader

def get_content(url, encoding='UTF-8', gzip_decompress=False):
        """
        Return str that contains content of url page
        """
        resp = urlopen(url)
        result = ""
        output = resp.read()
        try:
            if gzip_decompress:
                output = gzip.decompress(output)

            if encoding is not None:
                result = output.decode(encoding)
            else:
                result = output

        except UnicodeDecodeError:
            print("There's error while decoding response. Trying again...")
            # try downloading file
            urlretrieve(url, "file.tmp")
            fxml = open("file.tmp", "rb")
            result = gzip.decompress(fxml.read()).decode()
            #print(result)
            fxml.close()

        return result

Example #2

0

Show file

File: simple_proxy.py Project: philippelt/simple_proxy

    def unGzipBody(self) :
        debugTrace("unGzipBody")

        if self.chunk :
            self.httpBody = gzip.decompress(self.assembleChunks(self.httpBody))
        else:
            self.httpBody = gzip.decompress(self.httpBody)

Example #3

0

Show file

File: checker3.py Project: 1dot75cm/repo-checker

    def get_page(self, _url):
        ''' 获取整个页面数据
        return str '''

        header = { 'Accept-Encoding': 'gzip' }
        header['User-Agent'] = self.ualist[random.randint(0, len(self.ualist)-1)]
        if opts['user_agent']: header['User-Agent'] = opts['user_agent']

        req  = urllib.request.Request(url = _url, headers = header)
        pros = opts['proxy']
        if pros and pros[0] in ('http', 'https'):
            req.set_proxy(pros[1], pros[0])
        # urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed
        # https://www.python.org/dev/peps/pep-0476/
        context = ssl._create_unverified_context()
        page = urllib.request.urlopen(req, timeout=80, context=context).read()

        #gzip_handler = GzipHandler()
        #proxy_handler = urllib.request.ProxyHandler({'https':'XX.XX.XX.XX:XXXX'})
        #proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
        #proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
        #opener = urllib.request.build_opener(gzip_handler, proxy_handler, proxy_auth_handler)
        #opener.addheaders = [('User-Agent', 'Python-urllib/2.7')]
        #urllib.request.install_opener(opener)
        #page = opener.open(_url).read()

        try:
            if self.url_type == "2": return "None Content"
            if self.url_type == "4": return gzip.decompress(page).decode('gb2312').encode('utf-8')
            else:                    return gzip.decompress(page)
        except OSError:
            return page

Example #4

0

Show file

File: titles.py Project: martinqo99/school-projects

  def DownloadSubs(self):
    encoded_l1 = self.server.DownloadSubtitles(self.loginToken, [self.searchOS])
    decoded_l1 = base64.b64decode(bytes(encoded_l1['data'][0]['data'], "cp1250"))
    decompresed_l1 = str(gzip.decompress(decoded_l1), "cp1250")

    encoded_l2 = self.server.DownloadSubtitles(self.loginToken, [self.searchEqOS])
    decoded_l2 = base64.b64decode(bytes(encoded_l2['data'][0]['data'], "cp1250"))
    decompresed_l2 = str(gzip.decompress(decoded_l2), "cp1250")

Example #5

0

Show file

File: retrieve.py Project: BennerLab/atg

def fetch_url(url, path, overwrite=False, verbose=False):
    """
    Download specified url to destination path, returning True if successful. Will not overwrite existing files by
    default.
    :param url:
    :param path:
    :param overwrite:
    :param verbose:
    :return:
    """
    if not overwrite and os.path.exists(path):
        if verbose:
            print('The file %s already exists, so %s was not downloaded.' % (path, url))
        return True

    # create empty file for blank URLs, e.g. human homologs for human genome data
    if url == '':
        open(path, 'a').close()

    # for compressed files, make sure to decompress before writing to disk
    elif url.endswith('.gz'):
        try:
            response = urllib.request.urlopen(url)
        except urllib.error.HTTPError as error:
            print("Could not retrieve %s: %s [HTTP Error %s]" % (url, error.reason, error.code), file=sys.stderr)
            return False
        except urllib.error.URLError as error:
            print("Could not retrieve %s: %s" % (url, error.reason), file=sys.stderr)
            return False

        with open(path, 'wb') as output_file:
            if sys.platform == 'darwin':
                # write output in chunks to avoid bug in MacOS [https://bugs.python.org/issue24658]
                binary_result = gzip.decompress(response.read())
                result_length = len(binary_result)
                chunk_start = 0

                while chunk_start < result_length:
                    chunk_end = min(result_length, chunk_start+BINARY_CHUNKSIZE)
                    output_file.write(binary_result[chunk_start:chunk_end])
                    chunk_start = chunk_end
            else:
                output_file.write(gzip.decompress(response.read()))

    # download uncompressed files directly
    else:
        try:
            urllib.request.urlretrieve(url, filename=path)

        except urllib.error.HTTPError as error:
            print("Could not retrieve %s: %s [HTTP Error %s]" % (url, error.reason, error.code), file=sys.stderr)
            return False
        except urllib.error.URLError as error:
            print("Could not retrieve %s: %s" % (url, error.reason), file=sys.stderr)
            return False

    return True

Example #6

0

Show file

File: archive.py Project: isleon/Jaxer

def main(argv=None):
    if (argv is None):
        argv = sys.argv[1:]

    arch = DefaultArchiver()
    arch.debug(3)
    arch.exclude(".svn")
    arch.includeall(".")
    arch.list()

    hexdigest = arch.digest(__file__, "hexdigest.log", "md5")
    file = open("hexdigest.log", "r")
    contents = file.read()
    file.close()
    os.remove("hexdigest.log")
    print hexdigest
    assert hexdigest == contents

    jar = JarArchiver()
    jar.debug(3)
    #TODO:

    gzip = GzipArchiver()
    gzip.debug(3)
    gzip.compress(__file__, __file__ + ".gz")
    gzip.decompress(__file__ + ".gz", __file__ + ".gzbak")
    gzip.dump(__file__ + ".gzbak")
    os.remove(__file__ + ".gz")
    os.remove(__file__ + ".gzbak")

    zip = ZipArchiver("../cwd.zip")
    zip.debug(3)
    zip.exclude(".svn")
    zip.includeall(".")
    zip.list()
    zip.list(None, "../cwd-zip.log")
    zip.assemble()
    zip.list("../cwd.zip")
    zip.list("../cwd.zip", "../cwd2-zip.log")
    os.remove("../cwd.zip")
    os.remove("../cwd-zip.log")
    os.remove("../cwd2-zip.log")

    tar = TarArchiver("cwd.tgz")
    tar.debug(3)
    tar.exclude(".svn")
    tar.includeall(".")
    tar.list()
    tar.list(None, "cwd-tgz.log")
    tar.assemble()
    tar.list("cwd.tgz")
    tar.list("cwd.tgz", "cwd2-tgz.log")
    os.remove("cwd.tgz")
    os.remove("cwd-tgz.log")
    os.remove("cwd2-tgz.log")

Example #7

0

Show file

File: tools.py Project: z2642x/GSKSpider

 def get_response_content(url_response):
     content_encoding = url_response.getheader('Content-Encoding')
     if content_encoding is not None and 'gzip' in content_encoding:
         page_content = gzip.decompress(url_response.read())
     else:
         try:
             page_content = gzip.decompress(url_response.read())
         except OSError:
             try:
                 page_content = URLTool.deflate(url_response.read())
             except:
                 page_content = url_response.read()
     page_content = page_content.decode('utf-8')
     return page_content

Example #8

0

Show file

File: urllibRequests.py Project: StephDC/BOINCStats

def _gunzip(data):
    if hexversion < 0x300000:
        import zlib
        return zlib.decompress(data, 16 + zlib.MAX_WBITS)
    else:
        import gzip
        return gzip.decompress(data)

Example #9

0

Show file

File: hma.py Project: axle-h/hide-my-ass

    def __init__(self):
        self.requestTime = datetime.datetime.utcnow()
        request = _HmaRequest()
        response = urllib.request.urlopen(request)
        buffer = response.read()
        body = gzip.decompress(buffer) if response.info().get('Content-Encoding') == 'gzip' else buffer

        soup = BeautifulSoup(body, "html5lib")

        # try to get table by id
        # try to get table by class
        # fall back to just getting the first table
        table = soup.find("table", {"id": self.TableId}) or \
                soup.find("table", self.TableClass) or \
                soup.find("table")

        if table is None:
            raise Exception("No proxy table found")

        # remove all display: none tags
        [tag.decompose() for tag in table(attrs={"style": re.compile("display\s*:\s*none")})]

        fields = {i: self.__match_enum(ProxyField, self.FieldPatterns, header, None)
                  for i, header in enumerate(th.getText() for th in table.find("thead").find("tr").find_all("th"))}

        if None in fields.values():
            raise Exception("Missing field")

        self.proxies = [Proxy({field: self.__get_field(field, cell) for field, cell in
                               [(fields[fieldNumber], cells[fieldNumber]) for fieldNumber in fields]})
                        for cells in [list(row.findAll("td")) for row in table.find("tbody").findAll("tr")]]

Example #10

0

Show file

File: net.py Project: earthGavinLee/bcloud

def urlopen(url, headers={}, data=None, retries=RETRIES):
    '''打开一个http连接, 并返回Request.

    headers 是一个dict. 默认提供了一些项目, 比如User-Agent, Referer等, 就
    不需要重复加入了.

    这个函数只能用于http请求, 不可以用于下载大文件.
    如果服务器支持gzip压缩的话, 就会使用gzip对数据进行压缩, 然后在本地自动
    解压.
    req.data 里面放着的是最终的http数据内容, 通常都是UTF-8编码的文本.
    '''
    headers_merged = default_headers.copy()
    for key in headers.keys():
        headers_merged[key] = headers[key]
    opener = urllib.request.build_opener(ForbiddenHandler)
    opener.addheaders = [(k, v) for k,v in headers_merged.items()]

    for _ in range(retries):
        try:
            req = opener.open(url, data=data, timeout=TIMEOUT)
            encoding = req.headers.get('Content-encoding')
            req.data = req.read()
            if encoding == 'gzip':
                req.data = gzip.decompress(req.data)
            elif encoding == 'deflate':
                req.data = zlib.decompress(req.data, -zlib.MAX_WBITS)
            return req
        except OSError as e:
            print(e)
    return None

Example #11

0

Show file

File: net.py Project: earthGavinLee/bcloud

def post_multipart(url, headers, fields, files, retries=RETRIES):
    content_type, body = encode_multipart_formdata(fields, files)
    schema = urllib.parse.urlparse(url)

    headers_merged = default_headers.copy()
    for key in headers.keys():
        headers_merged[key] = headers[key]
    headers_merged['Content-Type'] = content_type
    headers_merged['Content-length'] = str(len(body))

    for _ in range(retries):
        try:
            h = http.client.HTTPConnection(schema.netloc)
            h.request('POST', url, body=body, headers=headers_merged)
            req = h.getresponse()
            encoding = req.getheader('Content-encoding')
            req.data = req.read()
            if encoding == 'gzip':
                req.data = gzip.decompress(req.data)
            elif encoding == 'deflate':
                req.data = zlib.decompress(req.data, -zlib.MAX_WBITS)
            return req
        except OSError as e:
            print(e)
    return None

Example #12

0

Show file

File: AirboxDataParserPy3k.py Project: lcabon258/Airbox-Taipei-Map

def download_device_info():       
    deviceurl=r"https://tpairbox.blob.core.windows.net/blobfs/AirBoxDevice.gz"
    print("downloading data from http://data.taipei/")
    response = urq.urlopen(deviceurl)
    print("decompressing the gzip file")
    with io.open("AirBoxDevice","wb") as out:
        out.write(gzip.decompress(response.read()))

Example #13

0

Show file

File: wpy.py Project: davispuh/pymclevel

def wdecompress(param):
	if PY_VERSION >= 3:
		return gzip.decompress(param)
	else:
		import StringIO
		with closing(gzip.GzipFile(fileobj=StringIO.StringIO(param))) as gz:
			return gz.read();

Example #14

0

Show file

File: pgload.py Project: Lysovenko/OTRS_US

 def load(self, location, data=None, headers={}):
     if not location:
         raise LoginError()
     self.last_url = re.sub(r"https?:\/\/[^/]+", r"", location)
     heads = {"Accept-Encoding": "gzip, deflate",
              "User-Agent": self.core_cfg.get("User-Agent", "OTRS_US/0.0")}
     if "Cookies" in self.runt_cfg:
         heads["Cookie"] = self.runt_cfg["Cookies"]
     heads.update(headers)
     r = Request(location, data, headers=heads)
     try:
         pg = urlopen(r, timeout=60)
     except HTTPError as err:
         self.echo("HTTP Error:", err.getcode())
         return
     except Exception as err:
         self.echo(repr(err))
         return
     pd = pg.read()
     if pg.getheader("Content-Encoding") == "gzip":
         pd = decompress(pd)
     self.dump_data(pg, pd)
     if not self.check_login(pd.decode(errors="ignore")):
         raise LoginError(r.get_full_url())
     return self.parse(pd)

Example #15

0

Show file

File: pgload.py Project: Lysovenko/OTRS_US

 def login(self, who=None, req=None):
     "login and load"
     if who is None:
         who = self.runt_cfg
     if req is None:
         req = self.last_url
     user = who["user"]
     passwd = str(who["password"])
     site = who["site"]
     r = Request(
         site, urlencode(
             [("Action", "Login"), ("RequestedURL", req), ("Lang", "en"),
              ("TimeOffset", ""), ("User", user), ("Password", passwd),
              ("login", "Login")]).encode())
     try:
         pg = urlopen(r, timeout=60)
     except BadStatusLine:
         raise LoginError("BadStatusLine")
     pd = pg.read()
     if pg.getheader("Content-Encoding") == "gzip":
         pd = decompress(pd)
     m = re.search(r"OTRSAgentInterface=[^;&]+", pg.geturl())
     if m and m.group(0):
         self.runt_cfg["Cookies"] = m.group(0)
     else:
         self.runt_cfg.pop("Cookies", None)
     self.dump_data(pg, pd)
     return self.parse(pd)

Example #16

0

Show file

File: load_and_tran.py Project: linstang/load_and_tran

def dld_ldss(sql,cursor):
    cursor.execute(sql,id = jobs)
    result = cursor.fetchall()
    wkt = result[0][1].read()  # compressed binary bytes
    ret = gzip.decompress(wkt)
    with open(r'C:\Users\tangjing\Desktop\Auto-Reg\ldss.xlsx', 'wb') as b2:
        b2.write(ret)

Example #17

0

Show file

File: serializers.py Project: sirex/databot

def loads(value, compression=None):
    """Convert Python object to a primitive value for storing to database."""
    if value is None:
        return None
    if compression == Compression.gzip:
        value = gzip.decompress(value)
    return msgpack.loads(value, encoding='utf-8')

Example #18

0

Show file

File: slurp-diaries.py Project: GarryMorrison/Feynman-knowledge-engine

def download_html(url):
    global download_tally
    headers = {"User-Agent": "Mozilla/5.0"}
    req = urllib.request.Request(url, None, headers)

    # put in a retry for loop:
    for k in range(miss_threshold):
        try:
            f = urllib.request.urlopen(req)
            break
        except Exception as e:  # k5 regularly bugs out, so we need this.
            print(e.reason)
            print("Trying again ... ")
            sys.stdout.flush()
            time.sleep(30)  # hopefully 30s is enough to solve the issue. Otherwise, may as well bail anyway.
            if (k + 1) >= miss_threshold:

                # save a copy first:
                save_sw(C, k5_file, False)
                f = open(download_tally_file, mode="w")
                f.write(str(download_tally))
                f.close()

                print("url request failed", miss_threshold, "times. Exiting!\n")
                sys.exit(0)

    if f.info().get("Content-Encoding") == "gzip":  # sometimes WP spits back gzip, even if not requested.
        html = gzip.decompress(f.read())
        html = str(html)
    else:
        html = f.read()
    f.close()
    download_tally += 1
    return html

Example #19

0

Show file

File: test_utils.py Project: AdricEpic/circuits

def test_gzip():
    s = b"Hello World!"
    contents = BytesIO(s)
    compressed = b"".join(compress(contents, 1))
    uncompressed = decompress(compressed)
    assert uncompressed == s
    contents.close()

Example #20

0

Show file

File: parser.py Project: TheNounProject/CairoSVG

    def __init__(self, **kwargs):
        """Create the Tree from SVG ``text``."""
        bytestring = kwargs.get('bytestring')
        file_obj = kwargs.get('file_obj')
        url = kwargs.get('url')
        unsafe = kwargs.get('unsafe')
        parent = kwargs.get('parent')
        parent_children = kwargs.get('parent_children')
        tree_cache = kwargs.get('tree_cache')
        element_id = None

        self.url_fetcher = kwargs.get('url_fetcher', fetch)

        if bytestring is not None:
            self.url = url
        elif file_obj is not None:
            bytestring = file_obj.read()
            self.url = getattr(file_obj, 'name', None)
            if self.url == '<stdin>':
                self.url = None
        elif url is not None:
            parent_url = parent.url if parent else None
            parsed_url = parse_url(url, parent_url)
            if parsed_url.fragment:
                self.url = urlunparse(parsed_url[:-1] + ('',))
                element_id = parsed_url.fragment
            else:
                self.url = parsed_url.geturl()
                element_id = None
            self.url = self.url or None
        else:
            raise TypeError(
                'No input. Use one of bytestring, file_obj or url.')
        if parent and self.url == parent.url:
            root_parent = parent
            while root_parent.parent:
                root_parent = root_parent.parent
            tree = root_parent.xml_tree
        else:
            if not bytestring:
                bytestring = self.fetch_url(
                    parse_url(self.url), 'image/svg+xml')
            if len(bytestring) >= 2 and bytestring[:2] == b'\x1f\x8b':
                bytestring = gzip.decompress(bytestring)
            parser = ElementTree.XMLParser(
                resolve_entities=unsafe, huge_tree=unsafe)
            tree = ElementTree.fromstring(bytestring, parser)
        remove_svg_namespace(tree)
        self.xml_tree = tree
        apply_stylesheets(self)
        if element_id:
            self.xml_tree = tree.find(".//*[@id='{}']".format(element_id))
            if self.xml_tree is None:
                raise TypeError(
                    'No tag with id="{}" found.'.format(element_id))
        super().__init__(
            self.xml_tree, self.url_fetcher, parent, parent_children, self.url)
        self.root = True
        if tree_cache is not None and self.url:
            tree_cache[(self.url, self.get('id'))] = self

Example #21

0

Show file

File: stackexchange.py Project: DarkDNA/Schongo

	def search(ctx, cmd, arg, *args):
                """so <netowkr> <tag1> <tag2> <etc>\nSearch through the stackexchange network for the given tags"""
                network = args[0]
                args = args[1:]
                
                thingy = ";".join(args)
                searchQuery = 'search?tagged={}&pagesize=3'.format(thingy)
                if network in apiURLS:
                        apiURL = apiURLS[network]
                else:
                        ctx.reply("Invalid Network","StackExchange")
                        return
                searchURL = apiURL.format(searchQuery)
                data = request.urlopen(searchURL).read()
                jsonData = gzip.decompress(data)
                decoded = jsonLoad(jsonData)
                results = decoded["total"]
                if results > 0:
                        res = min(results, 3)
                        ctx.reply("Results 1-{} of {}".format(res, prettyNumber(results)), expandedNames[network])
                else:
                        ctx.reply("No results for your query", expandedNames[network])
                for q in decoded['questions']:
                        title = q['title']
                        questionURL = questionURLS[network].format('questions', q['question_id'])
                        ctx.reply('{} • {}'.format(title, questionURL), expandedNames[network])

Example #22

0

Show file

File: papatcher.py Project: nonesuchnick/papatcher

    def get_manifest(self, stream, full):
        if not hasattr(self, "_streams") or stream not in self._streams:
            return False

        self._stream = self._streams[stream]
        # we no longer need all streams
        del self._streams

        print("* Downloading manifest from {0}/{1}/{2}.".format(
            self._stream["DownloadUrl"],
            self._stream["TitleFolder"],
            self._stream["ManifestName"]))

        # we still need to add the AuthSuffix for the download to work
        manifest_url = "{0}/{1}/{2}{3}".format(
            self._stream["DownloadUrl"],
            self._stream["TitleFolder"],
            self._stream["ManifestName"],
            self._stream["AuthSuffix"])

        try:
            response = urlopen(manifest_url)
            manifest_raw = decompress(response.read())
            self._manifest = loads(manifest_raw.decode("utf-8"))
            return self._verify_manifest(full)
        except URLError as err:
            print("! Could not retrieve manifest: {0}.".format(err.reason))
            return False

Example #23

0

Show file

File: content.py Project: akleiw/KSP

def decompress(data, encoding = None):
	"""check for content gzip encoding"""
	if not data or not encoding:
		return data
	if 'gzip' == encoding.lower():
		return gzip.decompress(data)
	raise Exception("decompress: unknown encoding", encoding)

Example #24

0

Show file

File: logic.py Project: ChucklesTheBeard/blueprints-site

def BlueprintStringDecode(blob):
    print('a')
    comparison = base64.b64encode(bytes(
            supported_mods['blueprintstring']['magic'] +
            supported_mods['blueprintstring']['magicend'], 'UTF-8'))
    if len(blob) <= len(comparison):
        return ''
    print('b')
    try:
        #base64 character set: alphanumeric, '+', '\'... '=' for padding
        nonBase64Chars = re.compile(r'[^a-zA-Z0-9+\/=]', re.UNICODE)
        b64 = nonBase64Chars.sub('', blob)
        gzipped = base64.b64decode(b64, validate=True)
        decompressed = gzip.decompress(gzipped)\
                .decode(encoding='utf-8',errors='strict')
        if decompressed.startswith(supported_mods['blueprintstring']['magic']):
            return\
        decompressed[len(supported_mods['blueprintstring']['magic']):\
                    -len(supported_mods['blueprintstring']['magicend'])]
        else:
            return ':('
    except TypeError: # base64 decoding error
        return 't'
#    except Error as e:
#        return e
    except OSError: # not gzipped
        return 'o'

Example #25

0

Show file

File: test_dcos_diagnostics.py Project: dcos/dcos

def validate_state(zip_state):
    assert isinstance(zip_state, zipfile.ZipExtFile)
    state_output = gzip.decompress(zip_state.read())
    state = json.loads(state_output)
    assert len(state["frameworks"]) == 2, "bundle must contains information about frameworks"
    task_count = len(state["frameworks"][1]["tasks"]) + len(state["frameworks"][0]["tasks"])
    assert task_count == 1, "bundle must contains information about tasks"

Example #26

0

Show file

File: net.py Project: 24/bcloud

def post_multipart(url, headers, fields, files, retries=RETRIES):
    content_type, body = encode_multipart_formdata(fields, files)
    schema = urllib.parse.urlparse(url)

    headers_merged = default_headers.copy()
    for key in headers.keys():
        headers_merged[key] = headers[key]
    headers_merged["Content-Type"] = content_type
    headers_merged["Content-length"] = str(len(body))

    for i in range(retries):
        try:
            h = http.client.HTTPConnection(schema.netloc)
            h.request("POST", url, body=body, headers=headers_merged)
            req = h.getresponse()
            encoding = req.getheader("Content-encoding")
            req.data = req.read()
            if encoding == "gzip":
                req.data = gzip.decompress(req.data)
            elif encoding == "deflate":
                req.data = zlib.decompress(req.data, -zlib.MAX_WBITS)
            return req
        except OSError:
            logger.error(traceback.format_exc())
        except:
            logger.error(traceback.format_exc())
            # return None
    return None

Example #27

0

Show file

File: main.py Project: eamars/nhentai_crawler

def get_page_content(tags, page_num):
    tasks = list()

    # Page template
    url = "http://nhentai.net/search/?q={}&page={}".format(tags, page_num)

    response = http_request(url)

    # Decompress response and decode into plain string
    buffer = gzip.decompress(response.read()).decode("utf-8")

    # Parse content
    parsed = BeautifulSoup(buffer, 'html.parser')
    gallery = parsed.body.main.div.div
    for div in gallery:
        # Filter out NavigableString class
        if isinstance(div, bs4.NavigableString):
            continue
        album = dict()
        album["data-tags"] = div.attrs["data-tags"]
        album["href"] = div.a.attrs["href"]
        album["cover"] = div.a.img.attrs["src"]
        album["caption"] = div.a.div.get_text()
        tasks.append(album)

    return tasks

Example #28

0

Show file

File: tt.py Project: Ericva/workspacePython

    def _crawl(self, nid):
        # pdb.set_trace()
        if self._collection.find_one({"_id": nid}):
            print("failed: news {} exists ... skip".format(nid))
            return

        url = "http://info.3g.qq.com/g/s?aid=finance_ss&id=" + nid
        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip,deflate,sdch",
            "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4",
            "User-Agent": "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19",
        }
        # print(url)
        with urlopen(Request(url, headers=headers), timeout=30) as fp:
            html_content = gzip.decompress(fp.read()).decode("utf-8")

        page = Page(html_content)

        create_at = page.get_create_time()
        title = page.get_title()
        content = page.get_content()

        if title == None or content == "":
            raise Exception("no content")

        self._collection.insert(dict(_id=nid, create_at=create_at, title=title, content=content))

        print("success: get {} characters from {} queue {}".format(len(content), url, self._queue.qsize()))

Example #29

0

Show file

File: api.py Project: brookesy2/pynab

def get_nzb(dataset=None):
    user = auth()
    if user:
        id = request.query.guid or None
        if not id:
            id = request.query.id or None

        if id:
            with db_session() as db:
                release = db.query(Release).join(NZB).join(Category).filter(Release.id == id).one()
                if release:
                    release.grabs += 1
                    user.grabs += 1
                    db.merge(release)
                    db.merge(user)
                    db.commit()

                    data = release.nzb.data
                    response.set_header('Content-type', 'application/x-nzb-compressed-gzip')
                    response.set_header('X-DNZB-Name', release.search_name)
                    response.set_header('X-DNZB-Category', release.category.name)
                    response.set_header('Content-Disposition', 'attachment; filename="{0}"'
                                        .format(release.search_name.replace(' ', '_') + '.nzb.gz')
                    )
                    return gzip.decompress(data)
                else:
                    return api_error(300)
        else:
            return api_error(200)
    else:
        return api_error(100)

Example #30

0

Show file

File: sotapi.py Project: testlnord/trends

def get_data(prefix, page_number=1, from_date=datetime.datetime(2008, 7, 28), to_date=datetime.datetime.now()):
    url = "http://api.stackexchange.com/2.2/{prefix}?" \
                  "page={page_num}&pagesize=100&" \
                  "fromdate={fromdate}&" \
                  "todate={todate}&" \
                  "site=stackoverflow&"\
                  "key={key}&"  \
                  "filter=!6JvKMTZy8(80_".format(prefix=quote(prefix),
                                                 key="gytnic74fozY)jD39pQSzg((",
                                                 page_num=page_number,
                                                 fromdate=int(from_date.timestamp()),
                                                 todate=int(to_date.timestamp()))
    logging.getLogger(__name__).info('Fetching: ' + url)
    while True:
        try:
            resp = ur.urlopen(url)
            break
        except urllib.error.URLError as e:
            if e.errno != 110:  # Connection - timeout. Ignore it and try again
                raise
            else:
                sleep(3, 7)
    data = gzip.decompress(resp.read())
    data = json.loads(data.decode())
    return data

Example #31

0

Show file

    def forward_request(self, method, path, data, headers):

        if path.split('?')[0] == '/health':
            return serve_health_endpoint(method, path, data)
        if method == 'POST' and path == '/graph':
            return serve_resource_graph(data)

        # kill the process if we receive this header
        headers.get(HEADER_KILL_SIGNAL) and os._exit(0)

        target = headers.get('x-amz-target', '')
        auth_header = get_auth_string(method, path, headers, data)
        if auth_header and not headers.get('authorization'):
            headers['authorization'] = auth_header
        host = headers.get('host', '')
        orig_req_url = headers.pop(HEADER_LOCALSTACK_REQUEST_URL, '')
        headers[HEADER_LOCALSTACK_EDGE_URL] = (re.sub(
            r'^([^:]+://[^/]+).*', r'\1', orig_req_url) or 'http://%s' % host)

        # extract API details
        api, port, path, host = get_api_from_headers(headers,
                                                     method=method,
                                                     path=path,
                                                     data=data)

        if api and config.LS_LOG:
            # print request trace for debugging, if enabled
            LOG.debug('IN(%s): "%s %s" - headers: %s - data: %s' %
                      (api, method, path, dict(headers), data))

        set_default_region_in_headers(headers)

        if port and int(port) < 0:
            return 404

        if not port:
            api, port = get_api_from_custom_rules(method, path, data,
                                                  headers) or (api, port)

        if not port:
            if method == 'OPTIONS':
                if api and config.LS_LOG:
                    # print request trace for debugging, if enabled
                    LOG.debug('OUT(%s): "%s %s" - status: %s' %
                              (api, method, path, 200))
                return 200

            if api in ['', None, API_UNKNOWN]:
                truncated = truncate(data)
                if auth_header or target or data or path not in [
                        '/', '/favicon.ico'
                ]:
                    LOG.info((
                        'Unable to find forwarding rule for host "%s", path "%s %s", '
                        'target header "%s", auth header "%s", data "%s"') %
                             (host, method, path, target, auth_header,
                              truncated))
            else:
                LOG.info((
                    'Unable to determine forwarding port for API "%s" - please '
                    'make sure this API is enabled via the SERVICES configuration'
                ) % api)
            response = Response()
            response.status_code = 404
            response._content = '{"status": "running"}'
            return response

        if api and not headers.get('Authorization'):
            headers['Authorization'] = aws_stack.mock_aws_request_headers(
                api)['Authorization']
        headers[HEADER_TARGET_API] = str(api)

        headers['Host'] = host
        if isinstance(data, dict):
            data = json.dumps(data)

        encoding_type = headers.get('Content-Encoding') or ''
        if encoding_type.upper() == GZIP_ENCODING.upper() and api not in [S3]:
            headers.set('Content-Encoding', IDENTITY_ENCODING)
            data = gzip.decompress(data)

        lock_ctx = BOOTSTRAP_LOCK
        if persistence.API_CALLS_RESTORED or is_internal_call_context(headers):
            lock_ctx = empty_context_manager()

        with lock_ctx:
            return do_forward_request(api,
                                      method,
                                      path,
                                      data,
                                      headers,
                                      port=port)

Example #32

0

Show file

File: model.py Project: Nick-007/preview

    def fetch(self, timeout=1):
        """
        Fetches the url, parses the title, desc and icon
        for the website passed in
        """

        q = Request(self.url)
        q.add_header('User-Agent', USER_AGENT)
        html = urlopen(q, timeout=timeout)

        encoding = html.getheader("Content-Encoding")

        content = html.read()

        if encoding == "gzip":
            content = gzip.decompress(content)

        soup = BeautifulSoup(
            content.decode("utf-8", "ignore"),
            "html.parser"
        )

        title_elems = [soup.findAll(attrs={
            attr: re.compile(r"title", re.I)
        }) for attr in ["name", "property"]]

        for i in range(1):
            if len(title_elems[i]) > 0:
                self.title = title_elems[i][0]["content"]
                break
            else:
                # Get the <title> as a string
                self.title = str(soup.title.string)

        titles = re.compile("[-–|:•]+").split(self.title)

        self.title = titles[0].strip()

        # Get the desc from whatever we can find
        desc_elems = soup.findAll("meta", attrs={"name": re.compile(r"Desc", re.I)})

        for meta_elem in desc_elems:
            if meta_elem.attrs["content"]:
                self.desc = meta_elem.attrs["content"]
                break

        if len(self.desc.split()) > 30:
            self.desc = " ".join(self.desc.split()[0:29]).strip()

            self.desc = self.desc.strip("…")
            self.desc = self.desc.strip(".")
            self.desc += "..."

        icon_link = soup.find("link", rel=re.compile(r"shortcut icon"))

        if icon_link is None:
            icon_link = soup.find("link", rel=re.compile(r"icon"))

        if icon_link is not None:
            # Check if icon link is global or relative
            icon_href = icon_link["href"]

            if icon_href.find("http") != -1:
                self.icon = icon_href
            else:
                self.icon = self.url + icon_href

        # Fetch Open Graph Image
        image = soup.find("meta", property="og:image")

        if image is None:
            # Use favicon if no image is specified
            self.image = self.icon

        if image is not None:
            # Check if image link is global or relative
            image_link = image["content"]

            if image_link.find("http") != -1:
                self.image = image_link
            else:
                self.image = self.url + image_link

          """  self.image = builder.create_url(

Example #33

0

Show file

import gzip

data = b"bjfbjskdfkjsdhfbsdjkfsdf"
t = gzip.compress(data)
print(t)
data = gzip.decompress(t)
print(data)

Example #34

0

Show file

 def gzip_decompress(data):
     return gzip.decompress(data)

Example #35

0

Show file

def testurl(url, showresponse=0, responseheaders=0):
    try:
        headerdict = json.loads(headerstxt)
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'}
        rqobj = urlreq.Request(url, None, headers)
        if (headerstxt != ""):
            for key in headerdict:
                rqobj.add_header(key, headerdict[key])
        result = urlreq.urlopen(rqobj, context=ctx)
        responsebody = result.read()
        print(
            "------------------------------------------------------------------------------------------------"
        )
        print(Fore.GREEN + "URL: " + url)
        print(Fore.WHITE + "HTTP Status: " + str(result.status))
        print("Response Size:" + str(len(responsebody)))
        piiList = ""
        if (showresponse == 1):
            print("Response Sample:")
            try:
                print(gzip.decompress(responsebody)[:300])
                if re.search(r'\S+@\S+', gzip.decompress(responsebody)):
                    piiList = piiList + " email |"
                if re.search('(lastname|firstname|first.name|last.name)',
                             gzip.decompress(responsebody), re.IGNORECASE):
                    piiList = piiList + " name |"
            except:
                print(responsebody[:300])
                print("REGULAR")
                #print(responsebody.decode("utf8", 'ignore')[:300])
                if re.search(r'\S+@\S+', responsebody.decode("utf8",
                                                             'ignore')):
                    piiList = piiList + " email |"
                if re.search('(lastname|firstname|first.name|last.name)',
                             responsebody.decode("utf8", 'ignore'),
                             re.IGNORECASE):
                    piiList = piiList + " name |"
        if (responseheaders == 1):
            print("")
            print("Response Headers:")
            respheaders = result.info()
            print(respheaders)
        if (piiList != ""):
            print(Fore.RED + " Possible PII ( |" + piiList + ") Found")
        return (True, result.url)
    except urllib.error.URLError as e:
        print(Fore.GREEN + "URL: " + url)
        try:
            print(Fore.WHITE + "HTTP Status: " + str(e.reason))
            responsebody = e.read()
            try:
                print(gzip.decompress(responsebody)[:300])
            except:
                print(e.read(300).decode("utf8", 'ignore'))
            #response = str(e.read().decode("utf8", 'ignore'))
            #regex results to check for some particular frameworks in debug mode
            if re.search(r'DEBUG...True', responsebody):
                print(Fore.RED + " Possible Django Debug Page Found")
            if re.search(r'Whoops..There was an error', responsebody):
                print(Fore.RED + " Possible Laravel Debug Page Found")
        except:
            print(Fore.RED + "An error occurred")
        print(Fore.WHITE + "")
        return (False, '')
    except:
        return (False, '')

Example #36

0

Show file

def test_check_messages():
    sub = PubsubSubscriber(sub_client=pubsub_v1.SubscriberClient())
    for message in sub.pull('x-i-a-test', 'xialib-sub-01'):
        header, data, id = sub.unpack_message(message)
        assert len(json.loads(gzip.decompress(data).decode())) == 2
        sub.ack('x-i-a-test', 'xialib-sub-01', id)

Example #37

0

Show file

 def _gzip_loads(self, data):
     return self._pickle_loads(gzip.decompress(data))

Example #38

0

Show file

File: utils.py Project: PostHog/posthog

def load_data_from_request(request):
    data = None
    if request.method == "POST":
        if request.content_type in ["", "text/plain", "application/json"]:
            data = request.body
        else:
            data = request.POST.get("data")
    else:
        data = request.GET.get("data")

    if not data:
        return None

    # add the data in sentry's scope in case there's an exception
    with configure_scope() as scope:
        scope.set_context("data", data)
        scope.set_tag("origin", request.META.get("REMOTE_HOST", "unknown"))
        scope.set_tag("referer", request.META.get("HTTP_REFERER", "unknown"))
        # since version 1.20.0 posthog-js adds its version to the `ver` query parameter as a debug signal here
        scope.set_tag("library.version", request.GET.get("ver", "unknown"))

    compression = (
        request.GET.get("compression") or request.POST.get("compression") or request.headers.get("content-encoding", "")
    )
    compression = compression.lower()

    if compression == "gzip" or compression == "gzip-js":
        if data == b"undefined":
            raise RequestParsingError(
                "data being loaded from the request body for decompression is the literal string 'undefined'"
            )

        try:
            data = gzip.decompress(data)
        except (EOFError, OSError) as error:
            raise RequestParsingError("Failed to decompress data. %s" % (str(error)))

    if compression == "lz64":
        if not isinstance(data, str):
            data = data.decode()
        data = data.replace(" ", "+")

        data = lzstring.LZString().decompressFromBase64(data)

        if not data:
            raise RequestParsingError("Failed to decompress data.")

        data = data.encode("utf-16", "surrogatepass").decode("utf-16")

    base64_decoded = None
    try:
        base64_decoded = base64_decode(data)
    except Exception:
        pass

    if base64_decoded:
        data = base64_decoded

    try:
        # parse_constant gets called in case of NaN, Infinity etc
        # default behaviour is to put those into the DB directly
        # but we just want it to return None
        data = json.loads(data, parse_constant=lambda x: None)
    except (json.JSONDecodeError, UnicodeDecodeError) as error_main:
        raise RequestParsingError("Invalid JSON: %s" % (str(error_main)))

    # TODO: data can also be an array, function assumes it's either None or a dictionary.
    return data

Example #39

0

Show file

def planttfdb(MSUID):

    # Find the file
    url = 'http://planttfdb.cbi.pku.edu.cn/download.php'
    html_page = helper.connectionError(url)
    soup = BeautifulSoup(html_page.content, "lxml")
    # Find headers
    for search in soup.findAll('table', {"id": "oid_tfid"}):
        for linkfound in search.findAll('a'):
            if (linkfound.contents[0] == "Oryza sativa subsp. japonica"):
                link = 'http://planttfdb.cbi.pku.edu.cn/' + linkfound.get(
                    'href')
                break

    # Give the entire name of the file with the extension .gz
    filename = link.split("/")[-1]

    # Give the name of the file without .gz
    uncompressName = filename[:-3] + ".txt"
    pathToFile = helper.formatPathToFile(uncompressName)

    # Test existant file
    if (not helper.existFile(pathToFile)):
        print("on telecharege")
        # Fetch the file by the url and decompress it
        r = requests.get(link)
        decompressedFile = gzip.decompress(r.content)

        # Create the file .txt

        with open(pathToFile, "wb") as f:
            f.write(decompressedFile)
            f.close()

    # Use the previous created file (.txt)
    with open(pathToFile, "r+b") as file:

        # Import file tab-delimited

        try:
            array = pd.read_csv(file, sep="\t", header=None)
        except pd.io.common.EmptyError:
            array = pd.DataFrame()
        # Named columns
        array.columns = ["TF_ID", "Gene_ID", "Family"]

        data = array.loc[array['TF_ID'] == MSUID]

    if (not data.empty):

        return data
    else:
        data = array.loc[array['Gene_ID'] == MSUID]

    if (data.empty):

        return False
    else:

        hashmap = {"Family": data["Family"].values[0]}
        return hashmap

Example #40

0

Show file

 async def decompress(cls, data: bytes, headers: T_Headers) -> bytes:
     return gzip.decompress(data)

Example #41

0

Show file

File: gz_to_psql.py Project: omok12/Wow2

                        host='localhost',
                        port='5432')
cur = conn.cursor()
cur.execute(''' CREATE TABLE IF NOT EXISTS test_temp (
                id BIGINT PRIMARY KEY NOT NULL,
                item JSONB NOT NULL,
                bid BIGINT NOT NULL,
                buyout BIGINT NOT NULL,
                quantity INT NOT NULL,
                unit_price BIGINT NOT NULL,
                time_left TEXT NOT NULL
                );''')
conn.commit()

with gzip.open('/home/olenovo/projects/Wow2/out_157/202011232000.gz') as f:
    file_content = gzip.decompress(f.read())
record_list = json.loads(file_content)['auctions']

table_name = 'test_temp'
columns = [
    'id', 'item', 'bid', 'buyout', 'quantity', 'unit_price', 'time_left'
]
for i in range(0, len(record_list)):
    vals = []
    for column in columns:
        try:
            vals.append(record_list[i][column])
        except KeyError:
            vals.append(0)
    sql_string = 'insert into test_temp (%s)  values (%s, %s, %s, %s, %s, %s, %s) on conflict (id) do nothing'
    #print(cur.mogrify(sql_string, (AsIs(', '.join(columns)), vals[0], json.dumps(vals[1]), vals[2], vals[3], vals[4], vals[5], vals[6])))

Example #42

0

Show file

File: Euler_Problem-082.py Project: LStepanek/Project-Euler_Befunge

  + "BOVPqjhAdItXunkKPR5fMz/cTcnTFYJWqe8rm1dXKM/EocaM2K6XMEsqs0tb9JkroWWf7oQUKRnU6l3q3jLn8X+d/LFN2PT3kt5Sjm7iw0i+SRt0nTkQYEtQEJwmfoZF"
  + "bwxV1OClngYvGQ2R7LO6VJMB+aYfTwEQJCn80WoB3g30BMKofDdmU0ItOXEnSOmIreFn5c0Az6p6XjrREqUD36oxMBNNXIrnBn89yRkzALdV+k9yOZHLxZvvr1V8fVja"
  + "jAmZFnV6TAPJc90eOReCERzlbpw/QGdMQiOBXr1GbD58UkqWQ6Fh7T1jpZKc+PLtPrm8JetD97P2T1StlxPOBGgHC5yFZgSCCgoq7FO8NAcpal6ypI+gEXyeiKVbS2qq"
  + "FI+uLOyaGguEACaFs+Yb2Fb1GWzekjmCmCGTN8zq/hPs2DdaneEYjHWch84bABK00op0aQLMUuh5sgvP+hdjeaMXtUKvYrlfFW42rSw+2HiOFw4Hk7Wnt4VK3SHXZy9s"
  + "wkYuxFuiPcnxTo9h3Mws9VBr2epo+okJfhkFV731WeF65YS/AXuheb6CMotLzXFUFWGReoWzj2XGFZJd2k1Jq3M06IAKFfmKzs4EbliNkcHrQiVaRfsbkCo9i8aAVKE4"
  + "fdRLPDwqb6nEony/wJeQHE1U8gZkt17xpvFBwOs00UTGPOJx3dme42yPe+Ws2/pSVsFHER+0jRntnjzyhI4XWpjq7mp6V54pIAHDghNKgnggWIT2zD/ZXlX+I9GvCn9p"
  + "+DdEctYlMBPQ9v5y1Chsxk3daGnPWK3OmFVRVleNRgTqjEvcX93e4rn1SMsZa8LXqmoqz+Xl6IQNG94faJkckZfOl77NnQrkzKxRdbOVBMvTxieAUDu55EZXTDnthWzH"
  + "c6KKBhmUKjOlqxSPLCoFx4L7gT3nvm+3puGoMU8s4exfcNEcl58zAtdUxKygNvRSyaqt4jxY59ZlBJ3g1QSQlVd771ftfLkORDm9dS5gJ/7rvUtpXHt9/nfx3P7+6W0Q"
  + "0LL2yvDvAdnA/0c/4DXuairg+8Hj8YxB4K/vN/nP+6vA31SGvs8T5L0fwoCPrX/fAv6fR0cG/zYJL/2+sdcc9vy/P04M/n75Zd5mNLzeHQbs+b+/b80Z/Nzs/f36aPuO"
  + "cAj4fr/K/fdTsBdWFHDMMAz/xftetvevTWD284OlvuBnk+P+b76/X8JOVUBfX2uDwO7fd7X6iIB/Jz8PGe+ndwLfT5h02Xd3kK/DV58Kz58d6v6/9/qeH2xoBWbX9P2H"
  + "ad4+TFINjgr2+mYerqgEDiVer2L/BXwDn7c6Du4+EANeCwJiMj5ifj78+P59VCQkCHQtBXziS22vfYwJLAh9UT1efStt3v1z9b7fWvu48Qg4dPj31rC3ZnDykXL6/WNx"
  + "RPfvaazvfenu3zQv0O7pTL+g/Ofi39t+390DUncAy9prwd+vo4zP5ykBNG2Pd7a/JDn/Hv5b3Lj1u30qIeBH4S7xk8P/++Um4N+sQh9D31/8+72H5++K9a1/J4Q/mx3A"
  + "v7tr/93g7srvT8Pr8/82h94uJfp33ltfz/UdMwb/L/6vhFn9P9h9dSn6RANQkP8BzUk35Q9BAAA=")
g = base64.b64decode(_g)[1:]
for i in range(base64.b64decode(_g)[0]):
    g = gzip.decompress(g)
g=list(g)
def gr(x,y):
    if(x>=0 and y>=0 and x<500 and y<180):
        return g[y*500 + x];
    return 0;
def gw(x,y,v):
    if(x>=0 and y>=0 and x<500 and y<180):
        g[y*500 + x]=v;
def td(a,b):
    return ((0)if(b==0)else(a//b))
def tm(a,b):
    return ((0)if(b==0)else(a%b))
s=[]
def sp():
    global s

Example #43

0

Show file

    nameo = sys.argv[1]
    named = sys.argv[2]

    # String is Base64 Encoded
    with open('origin.txt', 'rb') as fo:
        raw = fo.read()

    # raw_zip = Decoded ByteString
    raw_zip = bytearray(base64.b64decode(raw))[4:]

    # gzip_header to be kept
    zip_header = raw_zip[:10]

    # xml file
    raw_content = gzip.decompress(raw_zip)

    # replacement here
    result_content = raw_content.decode().replace(nameo, named).encode()

    # compress the data and append the sap AO accepted gzip header
    result_zip = gzip.compress(result_content, 6)
    result_zip = zip_header + bytearray(result_zip)[10:]

    # add AO accepted string header and base64 encoding
    result = base64.b64encode(
        len(result_content).to_bytes(4, "little") + result_zip)

    # generating output
    with open('result.txt', 'wb') as fw:
        fw.write(result)

Example #44

0

Show file

File: main.py Project: zorroroot/pacu

def main(args, pacu_main: 'Main'):
    session = pacu_main.get_active_session()

    ###### Don't modify these. They can be removed if you are not using the function.
    args = parser.parse_args(args)
    print = pacu_main.print

    fetch_data = pacu_main.fetch_data
    ######

    instances = []
    templates = []
    summary_data = {'instance_downloads': 0, 'template_downloads': 0}

    if args.instance_ids is not None:
        for instance in args.instance_ids.split(','):
            instance_id, region = instance.split('@')
            instances.append({'InstanceId': instance_id, 'Region': region})
    elif args.template_ids is None:
        # If args.instance_ids was not passed in,
        # only fetch instances if args.template_ids
        # is also None
        if fetch_data(['EC2', 'Subnets'],
                      module_info['prerequisite_modules'][0],
                      '--instances') is False:
            print('Pre-req module not run successfully. Exiting...')
            return None
        instances = session.EC2['Instances']

    if args.template_ids is not None:
        for template in args.template_ids.split(','):
            template_id, region = template.split('@')
            templates.append({
                'LaunchTemplateId': template_id,
                'Region': region
            })
    elif args.instance_ids is None:
        # If args.template_ids was not passed in,
        # only fetch templates if args.instance_ids
        # is also None
        if fetch_data(['EC2', 'LaunchTemplates'],
                      module_info['prerequisite_modules'][0],
                      '--launch-templates') is False:
            print('Pre-req module not run successfully. Exiting...')
            templates = []
        else:
            templates = session.EC2['LaunchTemplates']

    if instances:
        print('Targeting {} instance(s)...'.format(len(instances)))
        for instance in instances:

            # if the filter is actived check the tags. If tags do not match skip instance
            if args.filter and not has_tags(args.filter.split(','), instance):
                continue

            instance_id = instance['InstanceId']
            region = instance['Region']
            client = pacu_main.get_boto3_client('ec2', region)

            try:
                user_data = client.describe_instance_attribute(
                    InstanceId=instance_id, Attribute='userData')['UserData']
            except ClientError as error:
                code = error.response['Error']['Code']
                print('FAILURE: ')
                if code == 'AccessDenied':
                    print('  Access denied to DescribeInstanceAttribute.')
                    print('Skipping the rest of the instances...')
                    break
                else:
                    print('  ' + code)

            if 'Value' in user_data.keys():
                decoded = base64.b64decode(user_data['Value'])

                try:
                    decompressed = gzip.decompress(decoded)
                    formatted_user_data = '{}@{}:\n{}\n\n'.format(
                        instance_id, region,
                        decompressed.decode('utf-8', 'backslashreplace'))
                except:
                    formatted_user_data = '{}@{}:\n{}\n\n'.format(
                        instance_id, region,
                        decoded.decode('utf-8', 'backslashreplace'))

                print('  {}@{}: User Data found'.format(instance_id, region))

                # Check for secrets
                find_secrets(formatted_user_data)

                # Write to the "all" file
                with save('ec2_user_data/all_user_data.txt', 'a+') as f:
                    f.write(formatted_user_data)
                # Write to the individual file
                with save('ec2_user_data/{}.txt'.format(instance_id)) as f:
                    f.write(
                        formatted_user_data.replace('\\t', '\t').replace(
                            '\\n', '\n').rstrip())
                summary_data['instance_downloads'] += 1
            else:
                print('  {}@{}: No User Data found'.format(
                    instance_id, region))
        print()
    else:
        print('No instances to target.\n')

    if templates:
        print('Targeting {} launch template(s)...'.format(len(templates)))
        for template in templates:
            template_id = template['LaunchTemplateId']
            region = template['Region']
            client = pacu_main.get_boto3_client('ec2', region)

            all_versions = []

            try:
                response = client.describe_launch_template_versions(
                    LaunchTemplateId=template_id)
                all_versions.extend(response['LaunchTemplateVersions'])
            except ClientError as error:
                code = error.response['Error']['Code']
                print('FAILURE: ')
                if code == 'AccessDenied':
                    print('  Access denied to DescribeLaunchTemplateVersions.')
                    print('Skipping the rest of the launch templates...\n')
                    break
                else:
                    print('  ' + code)

            while response.get('NextToken'):
                response = client.describe_launch_template_versions(
                    LaunchTemplateId=template_id,
                    NextToken=response['NextToken'])
                all_versions.extend(response['LaunchTemplateVersions'])

            for version in all_versions:
                if version['LaunchTemplateData'].get('UserData'):
                    try:
                        was_unzipped = False
                        user_data = version['LaunchTemplateData']['UserData']
                        formatted_user_data = '{}-version-{}@{}:\n{}\n\n'.format(
                            template_id, version['VersionNumber'], region,
                            base64.b64decode(user_data).decode('utf-8'))
                    except UnicodeDecodeError as error:
                        try:
                            decoded = base64.b64decode(user_data)
                            decompressed = gzip.decompress(decoded)
                            formatted_user_data = '{}@{}:\n{}\n\n'.format(
                                instance_id, region,
                                decompressed.decode('utf-8'))
                            was_unzipped = True
                        except:
                            print('ERROR: GZIP decrompressing template data')
                    print('  {}-version-{}@{}: User Data found'.format(
                        template_id, version['VersionNumber'], region))
                    if was_unzipped:
                        print('    Gzip decoded the User Data')

                    # Write to the "all" file
                    with save('ec2_user_data/all_user_data.txt', 'a+') as f:
                        f.write(formatted_user_data)
                    # Write to the individual file
                    with save('ec2_user_data/{}-version-{}.txt'.format(
                            template_id, version['VersionNumber'])) as f:
                        f.write(
                            formatted_user_data.replace('\\t', '\t').replace(
                                '\\n', '\n').rstrip())
                    summary_data['template_downloads'] += 1
                else:
                    print('  {}-version-{}@{}: No User Data found'.format(
                        template_id, version['VersionNumber'], region))
        print()
    else:
        print('No launch templates to target.\n')

    return summary_data

Example #45

0

Show file

File: get-domains-from-yum-repo.py Project: martin-schlossarek/get-domains-from-yum-repo

 def get_primary_xml(self, repomd_xml):
     primary_xml_url = repomd_xml.find(f"{REPO_NS}data[@type='primary']/{REPO_NS}location").attrib["href"]
     response = self.loader.load(urljoin(self.url, primary_xml_url))
     return ElementTree.fromstring(gzip.decompress(response.content))

Example #46

0

Show file

File: generate_dashboard.py Project: schulkov/cp2k

def gen_archive(config: ConfigParser, log: GitLog, outdir: Path) -> None:

    for s in config.sections():
        print(f"Working on archive page of: {s}")
        name = config.get(s, "name")
        info_url = config.get(s, "info_url", fallback=None)
        archive_dir = outdir / f"archive/{s}"
        archive_files = archive_dir.glob("commit_*.txt.gz")

        # read cache
        reports_cache: Dict[Path, Report] = {}
        cache_fn = f"{archive_dir}/reports.cache"
        if path.exists(cache_fn):
            with open(cache_fn, "rb") as f:
                reports_cache = pickle.load(f)
            cache_age = path.getmtime(cache_fn)
            # remove outdated cache entries
            reports_cache = {
                k: v for k, v in reports_cache.items() if path.getmtime(k) < cache_age
            }

        # read all archived reports
        archive_reports: Dict[GitSha, Report] = {}
        for fn in archive_files:
            report: Report
            if fn in reports_cache:
                report = reports_cache[fn]
            else:
                with open(fn, "rb") as f:
                    content = gzip.decompress(f.read())
                report = parse_report(content.decode("utf-8", errors="replace"), log)
                report.archive_path = path.basename(fn)[:-3]
                reports_cache[fn] = report
            assert report.sha and report.sha not in archive_reports
            archive_reports[report.sha] = report

        # write cache
        if reports_cache:
            with open(cache_fn, "wb") as f:
                pickle.dump(reports_cache, f)

        # loop over all relevant commits
        all_url_rows = []
        all_html_rows = []
        max_age_full = max([-1] + [log.index_by_sha[sha] for sha in archive_reports])
        for commit in log.commits[: max_age_full + 1]:
            sha = commit.sha
            html_row = "<tr>"
            html_row += commit_cell(sha, log)
            if sha in archive_reports:
                report = archive_reports[sha]
                assert report.archive_path
                html_row += status_cell(report.status, report.archive_path)
                html_row += f'<td align="left">{report.summary}</td>'
                archive_base_url = "https://dashboard.cp2k.org/archive"
                url_row = f"{archive_base_url}/{s}/{report.archive_path}.gz\n"
            else:
                html_row += 2 * "<td></td>"
                url_row = ""
            html_row += f'<td align="left">{html.escape(commit.author_name)}</td>'
            html_row += f'<td align="left">{html.escape(commit.message)}</td>'
            html_row += "</tr>\n\n"
            all_html_rows.append(html_row)
            all_url_rows.append(url_row)

        # generate html pages
        for full_archive in (False, True):
            if full_archive:
                html_out_postfix = "index_full.html"
                urls_out_postfix = "list_full.txt"
                toggle_link = '<p>View <a href="index.html">recent archive</a></p>'
                max_age = max_age_full
            else:
                html_out_postfix = "index.html"
                urls_out_postfix = "list_recent.txt"
                toggle_link = '<p>View <a href="index_full.html">full archive</a></p>'
                max_age = 100

            # generate archive index
            output = html_header(title=name)
            output += '<p>Go back to <a href="../../index.html">main page</a></p>\n'
            if info_url:
                output += f'<p>Get <a href="{info_url}">more information</a></p>\n'
            output += gen_plots(archive_reports, log, archive_dir, full_archive)
            output += toggle_link
            output += '<table border="1" cellspacing="3" cellpadding="5">\n'
            output += "<tr><th>Commit</th><th>Status</th><th>Summary</th>"
            output += "<th>Author</th><th>Commit Message</th></tr>\n\n"
            output += "".join(all_html_rows[: max_age + 1])
            output += "</table>\n"
            output += toggle_link
            output += html_footer()
            write_file(archive_dir / html_out_postfix, output)

            url_list = "".join(all_url_rows[: max_age + 1])
            write_file(archive_dir / urls_out_postfix, url_list)

Example #47

0

Show file

def bytes2dir(tarbytes: bytes, dirname='.') -> None:
    """Take the bytes that make-up an gzip'd tar archive and decompress to disk."""
    fileobj = io.BytesIO(tarbytes)
    with tarfile.TarFile(fileobj=io.BytesIO(gzip.decompress(tarbytes))) as tar:
        tar.extractall(dirname)

Example #48

0

Show file

File: api.py Project: gaoyang-projects/ghost_sa

def get_data():
    remark = request.args.get(
        'remark') if 'remark' in request.args else 'normal'
    project = request.args.get('project')
    User_Agent = request.headers.get(
        'User-Agent'
    )  #Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36
    Host = request.headers.get('Host')  #: 10.16.5.241:5000
    Connection = request.headers.get('Connection')  #: keep-alive
    Pragma = request.headers.get('Pragma')  #: no-cache
    Cache_Control = request.headers.get('Cache-Control')  #: no-cache
    Accept = request.headers.get('Accept')[0:254] if request.headers.get(
        'Accept') else None  #: image/webp,image/apng,image/*,*/*;q=0.8
    Accept_Encoding = request.headers.get(
        'Accept-Encoding')[0:254] if request.headers.get(
            'Accept-Encoding') else None  #: gzip, deflate
    Accept_Language = request.headers.get(
        'Accept-Language')[0:254] if request.headers.get(
            'Accept-Language') else None  #: zh-CN,zh;q=0.9
    ua_platform = request.user_agent.platform  #客户端操作系统
    ua_browser = request.user_agent.browser  #客户端的浏览器
    ua_version = request.user_agent.version  #客户端浏览器的版本
    ua_language = request.user_agent.language  #客户端浏览器的语言
    ext = request.args.get('ext')
    url = request.url
    # ip = '124.115.214.179' #测试西安bug
    # ip = '36.5.99.68' #测试安徽bug
    if request.headers.get('X-Forwarded-For') is None:
        ip = request.remote_addr  #服务器直接暴露
    else:
        ip = request.headers.get('X-Forwarded-For')  #获取SLB真实地址
    ip_city, ip_is_good = get_addr(ip)
    ip_asn, ip_asn_is_good = get_asn(ip)
    if ip_is_good == 0:
        ip_city = '{}'
    if ip_asn_is_good == 0:
        ip_asn = '{}'
    referrer = request.referrer
    if request.method == 'POST':
        # print(request.form.get())
        if 'data_list' in request.form:
            data_list = request.form.get('data_list')
            de64 = base64.b64decode(
                urllib.parse.unquote(data_list).encode('utf-8'))
            try:
                data_decodes = json.loads(gzip.decompress(de64))
            except:
                data_decodes = json.loads(de64)
            for data_decode in data_decodes:
                insert_data(project=project,
                            data_decode=data_decode,
                            User_Agent=User_Agent,
                            Host=Host,
                            Connection=Connection,
                            Pragma=Pragma,
                            Cache_Control=Cache_Control,
                            Accept=Accept,
                            Accept_Encoding=Accept_Encoding,
                            Accept_Language=Accept_Language,
                            ip=ip,
                            ip_city=ip_city,
                            ip_asn=ip_asn,
                            url=url,
                            referrer=referrer,
                            remark=remark,
                            ua_platform=ua_platform,
                            ua_browser=ua_browser,
                            ua_version=ua_version,
                            ua_language=ua_language,
                            ip_is_good=ip_is_good,
                            ip_asn_is_good=ip_asn_is_good)
        elif 'data' in request.form:
            # print(request.cookies)
            data = request.form.get('data')
            de64 = base64.b64decode(urllib.parse.unquote(data).encode('utf-8'))
            try:
                data_decode = json.loads(gzip.decompress(de64))
            except:
                data_decode = json.loads(de64)
            insert_data(project=project,
                        data_decode=data_decode,
                        User_Agent=User_Agent,
                        Host=Host,
                        Connection=Connection,
                        Pragma=Pragma,
                        Cache_Control=Cache_Control,
                        Accept=Accept,
                        Accept_Encoding=Accept_Encoding,
                        Accept_Language=Accept_Language,
                        ip=ip,
                        ip_city=ip_city,
                        ip_asn=ip_asn,
                        url=url,
                        referrer=referrer,
                        remark=remark,
                        ua_platform=ua_platform,
                        ua_browser=ua_browser,
                        ua_version=ua_version,
                        ua_language=ua_language,
                        ip_is_good=ip_is_good,
                        ip_asn_is_good=ip_asn_is_good)
        else:
            write_to_log(filename='api',
                         defname='get_datas',
                         result=str(request.form))
            # print(request.form)
    elif request.method == 'GET':
        # try:
        if 'data' in request.args:
            data = request.args.get('data')
            de64 = base64.b64decode(urllib.parse.unquote(data).encode('utf-8'))
            try:
                data_decode = json.loads(gzip.decompress(de64))
            except:
                data_decode = json.loads(de64)
            insert_data(project=project,
                        data_decode=data_decode,
                        User_Agent=User_Agent,
                        Host=Host,
                        Connection=Connection,
                        Pragma=Pragma,
                        Cache_Control=Cache_Control,
                        Accept=Accept,
                        Accept_Encoding=Accept_Encoding,
                        Accept_Language=Accept_Language,
                        ip=ip,
                        ip_city=ip_city,
                        ip_asn=ip_asn,
                        url=url,
                        referrer=referrer,
                        remark=remark,
                        ua_platform=ua_platform,
                        ua_browser=ua_browser,
                        ua_version=ua_version,
                        ua_language=ua_language,
                        ip_is_good=ip_is_good,
                        ip_asn_is_good=ip_asn_is_good)
        else:
            write_to_log(filename='api', defname='get_datas', result=url)
    else:
        write_to_log(filename='api',
                     defname='get_datas',
                     result=str(request.method) + url)
    bitimage1 = os.path.join('image', '43byte.gif')
    with open(bitimage1, 'rb') as f:
        returnimage = f.read()
    return Response(returnimage, mimetype="image/gif")

Example #49

0

Show file

    def add():
        global wrok
        global insatData
        wrok = 0
        request.get_data()
        compressed_data = request.data
        decom = gzip.decompress(compressed_data)
        data1 = decom.decode('UTF-8')
        print(type(data1))
        ldata = {}
        print(str(len(names)))
        for ind in range(len(names)):

            n1 = names[ind]
            print('CHECK ' + str(ind) + ' ' + n1 + ' ')
            orig = orignames[ind]
            p1 = data1.find(n1)
            if p1 < 0:
                print('No name in data ' + n1 + ' ' + orig)
                continue
            s1 = ''
            while (data1[p1] != ' '):
                p1 += 1
            p1 += 2
            while (data1[p1] != '\n'):
                s1 += data1[p1]
                p1 += 1
            print(s1)
            name = s1.split('.')
            nm = ''
            for i in range(3):
                nm += name[i] + '.'
            nm += name[3]
            print(nm)
            nm += '.Znachenie'
            p2 = data1.find(nm)
            if p2 < 0:
                print('!!!No value ' + n1 + ' ' + orig + ' = ' + nm)
                continue
            while (data1[p2 - 2] != '\n'):
                p2 -= 1
            sval = ''
            while (data1[p2] != '\"'):
                sval += data1[p2]
                p2 += 1
            print('value[' + orig + '] = ' + sval)
            varval = sval.replace(',', '.')

            try:
                nval = float(varval)
            except:
                nval = -13
            if orignames[ind].find('DS') > -1:
                varval = "%d" % (int(nval) * 1000)

            ldata[orignames[ind]] = varval

        #print( len(data.decode('UTF-8') ))
        test1 = "".join(str(ldata))
        insatData = test1.replace('\'', '\"')
        print(insatData)

        print(len(names))
        if wrok == 0:
            f = open('insatJson.txt', 'w')
            f.write(insatData)
            f.close()
            wrok = 1


#        shutdown()
        resp = make_response('hello')
        resp.headers['content-type'] = 'text/html'
        resp.headers['Access-Control-Allow-Origin'] = '*'
        resp.headers[
            'Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS'
        return resp

Example #50

0

Show file

 def unpack_data(self):
     if self.is_gzipped_html():
         data = gzip.decompress(self.packed_data)
         self._data = data.decode('utf8')

Example #51

0

Show file

    def download(self,
                 link,
                 media_resource=None,
                 path=None,
                 convert_to_vtt=False):
        """
        Downloads a subtitle link (.srt/.vtt file or gzip/zip OpenSubtitles archive link) to the specified directory

        :param link: Local subtitles file or OpenSubtitles gzip download link
        :type link: str

        :param path: Path where the subtitle file will be downloaded (default: temporary file under /tmp)
        :type path: str

        :param media_resource: Name of the media resource. If set and if it's a
            media local file then the subtitles will be saved in the same folder
        :type media_resource: str

        :param convert_to_vtt: If set to True, then the downloaded subtitles
            will be converted to VTT format (default: no conversion)
        :type convert_to_vtt: bool

        :returns: dict.

        Format::

            {
                "filename": "/path/to/subtitle/file.srt"
            }

        """

        if link.startswith('file://'):
            link = link[len('file://'):]
        if os.path.isfile(link):
            if convert_to_vtt:
                link = self.to_vtt(link).output
            return {'filename': link}

        gzip_content = requests.get(link).content

        if not path and media_resource:
            if media_resource.startswith('file://'):
                media_resource = media_resource[len('file://'):]
            if os.path.isfile(media_resource):
                media_resource = os.path.abspath(media_resource)
                path = os.path.join(
                    os.path.dirname(media_resource), '.'.join(
                        os.path.basename(media_resource).split('.')
                        [:-1])) + '.srt'

        if path:
            f = open(path, 'wb')
        else:
            f = tempfile.NamedTemporaryFile(prefix='media_subs_',
                                            suffix='.srt',
                                            delete=False)
            path = f.name

        try:
            with f:
                f.write(gzip.decompress(gzip_content))
            if convert_to_vtt:
                path = self.to_vtt(path).output
        except Exception as e:
            os.unlink(path)
            raise e

        return {'filename': path}

Example #52

0

Show file

def prepareBatchesForExecutionTrace(configDir, executionTraceId,
                                    executionSessionId, batchDirectory):
    try:
        config = Configuration(configDir)

        agent = DeepLearningAgent(config, whichGpu=None)

        sampleCacheDir = config.getKwolaUserDataDirectory("prepared_samples")
        cacheFile = os.path.join(sampleCacheDir,
                                 executionTraceId + ".pickle.gz")

        if not os.path.exists(cacheFile):
            addExecutionSessionToSampleCache(executionSessionId, config)
            cacheHit = False
        else:
            cacheHit = True

        with open(cacheFile, 'rb') as file:
            sampleBatch = pickle.loads(gzip.decompress(file.read()))

        imageWidth = sampleBatch['processedImages'].shape[3]
        imageHeight = sampleBatch['processedImages'].shape[2]

        # Calculate the crop positions for the main training image
        if config['training_enable_image_cropping']:
            randomXDisplacement = random.randint(
                -config['training_crop_center_random_x_displacement'],
                config['training_crop_center_random_x_displacement'])
            randomYDisplacement = random.randint(
                -config['training_crop_center_random_y_displacement'],
                config['training_crop_center_random_y_displacement'])

            cropLeft, cropTop, cropRight, cropBottom = agent.calculateTrainingCropPosition(
                sampleBatch['actionXs'][0] + randomXDisplacement,
                sampleBatch['actionYs'][0] + randomYDisplacement, imageWidth,
                imageHeight)
        else:
            cropLeft = 0
            cropRight = imageWidth
            cropTop = 0
            cropBottom = imageHeight

        # Calculate the crop positions for the next state image
        if config['training_enable_next_state_image_cropping']:
            nextStateCropCenterX = random.randint(10, imageWidth - 10)
            nextStateCropCenterY = random.randint(10, imageHeight - 10)

            nextStateCropLeft, nextStateCropTop, nextStateCropRight, nextStateCropBottom = agent.calculateTrainingCropPosition(
                nextStateCropCenterX,
                nextStateCropCenterY,
                imageWidth,
                imageHeight,
                nextStepCrop=True)
        else:
            nextStateCropLeft = 0
            nextStateCropRight = imageWidth
            nextStateCropTop = 0
            nextStateCropBottom = imageHeight

        # Crop all the input images and update the action x & action y
        # This is done at this step because the cropping is random
        # and thus you don't want to store the randomly cropped version
        # in the redis cache
        sampleBatch['processedImages'] = sampleBatch[
            'processedImages'][:, :, cropTop:cropBottom, cropLeft:cropRight]
        sampleBatch['pixelActionMaps'] = sampleBatch[
            'pixelActionMaps'][:, :, cropTop:cropBottom, cropLeft:cropRight]
        sampleBatch['rewardPixelMasks'] = sampleBatch[
            'rewardPixelMasks'][:, cropTop:cropBottom, cropLeft:cropRight]
        sampleBatch['actionXs'] = sampleBatch['actionXs'] - cropLeft
        sampleBatch['actionYs'] = sampleBatch['actionYs'] - cropTop

        sampleBatch['nextProcessedImages'] = sampleBatch[
            'nextProcessedImages'][:, :, nextStateCropTop:nextStateCropBottom,
                                   nextStateCropLeft:nextStateCropRight]
        sampleBatch['nextPixelActionMaps'] = sampleBatch[
            'nextPixelActionMaps'][:, :, nextStateCropTop:nextStateCropBottom,
                                   nextStateCropLeft:nextStateCropRight]

        # Add augmentation to the processed images. This is done at this stage
        # so that we don't store the augmented version in the redis cache.
        # Instead, we want the pure version in the redis cache and create a
        # new augmentation every time we load it.
        processedImage = sampleBatch['processedImages'][0]
        augmentedImage = agent.augmentProcessedImageForTraining(processedImage)
        sampleBatch['processedImages'][0] = augmentedImage

        fileDescriptor, fileName = tempfile.mkstemp(".bin", dir=batchDirectory)

        with open(fileDescriptor, 'wb') as batchFile:
            pickle.dump(sampleBatch, batchFile)

        return fileName, cacheHit
    except Exception:
        traceback.print_exc()
        print("", flush=True)
        raise

Example #53

0

Show file

 async def load_xml_gz(self, file_path, session):
     # loads nytimes site map files from disk
     async with session.get(file_path) as resp:
         gzip_bin = await resp.read()
         decoded_gzip = gzip.decompress(gzip_bin)
         return decoded_gzip

Example #54

0

Show file

 def _decompress(self):
     self.contents = gzip.decompress(self.raw_contents).decode('utf-8')

Example #55

0

Show file

File: 3dtiles_download.py Project: qjw2bqn/cesium

def ungzip(data):
    try:
        data = gzip.decompress(data)
    except:
        pass
    return data

Example #56

0

Show file

File: toolkit.py Project: christophmluscher/sisyphus

 def load_tar(filename):
     with tarfile.open(filename) as tar:
         with tar.extractfile(gs.JOB_SAVE) as f:
             return pickle.loads(gzip.decompress(f.read()))

Example #57

0

Show file

def doit(args):

    global logger, sourcettf, outputdir, fontdir

    logger = args.logger
    sourcettf = args.ttfont

    # Create output directory, including fonts subdirectory, if not present
    outputdir = args.outputdir
    os.makedirs(outputdir, exist_ok=True)
    fontdir = os.path.join(outputdir, 'fonts')
    os.makedirs(fontdir, exist_ok=True)

    # Read and save feature mapping
    for r in args.map:
        if r[0].startswith('#'):
            continue
        elif r[0].startswith('lang='):
            r[0] = r[0][5:]
            lang_maps[r[0]] = lang_map(r)
        else:
            feat_maps[r[0]] = feat_map(r)

    # Open and verify input file is a tunable font; extract and parse feat_all from font.
    font = ttLib.TTFont(sourcettf)
    raw_data = font.getTableData('Silt')
    feat_xml = gzip.decompress(raw_data)  # .decode('utf-8')
    root = ET.fromstring(feat_xml)
    if root.tag != 'all_features':
        logger.log("Invalid TypeTuner feature file: missing root element", "S")
    for i, f in enumerate(root.findall('.//feature')):
        # add to dictionary
        ttfeat = feat(f, i)
        feat_all[ttfeat.name] = ttfeat

    # Open and prepare the xslt file to transform the ftml:
    xslt = ET.parse(args.xsl)
    xslt_transform = ET.XSLT(xslt)

    # Process all ftml files:

    for arg in args.ftml:
        for infname in glob(arg):
            # based on input filename, construct output name
            # find filename and change extension to html:
            outfname = os.path.join(
                outputdir,
                os.path.splitext(os.path.basename(infname))[0] + '.html')
            logger.log('Processing: {} -> {}'.format(infname, outfname), 'P')

            # Each named style in the FTML ultimately maps to a TypeTuned font that will be added via @fontface.
            # We need to remember the names of the styles and their associated fonts so we can hack the html.
            sname2font = dict(
            )  # Indexed by ftml stylename; result is a font object

            # Parse the FTML
            ftml_doc = ET.parse(infname)

            # Adjust <title> to show this is from TypeTuner
            head = ftml_doc.find('head')
            title = head.find('title')
            title.text += " - TypeTuner"
            # Replace all <fontsrc> elements with two identical from the input font:
            #   One will remain unchanged, the other will eventually be changed to a typetuned font.
            ET.strip_elements(head, 'fontsrc')
            fpathname = os.path.relpath(sourcettf, outputdir).replace(
                '\\', '/')  # for css make sure all slashes are forward!
            head.append(
                ET.fromstring('<fontsrc>url({})</fontsrc>'.format(
                    fpathname)))  # First font
            head.append(
                ET.fromstring('<fontsrc>url({})</fontsrc>'.format(
                    fpathname)))  # Second font, same as the first

            # iterate over all the styles in this ftml file, building tuned fonts to match if not already done.
            for style in head.iter('style'):
                sname = style.get('name')  # e.g. "some_style"
                feats = style.get(
                    'feats'
                )  # e.g "'cv02' 1, 'cv60' 1"  -- this we'll parse to get need tt features
                lang = style.get('lang')  # e.g., "sd"
                font_tag = cache_font(feats, lang, args.norebuild)
                # font_tag could be None due to errors, but messages should already have been logged
                # If it is valid, remember how to find this font from the ftml stylename
                if font_tag:
                    sname2font[sname] = font_tag2font[font_tag]

            # convert to html via supplied xslt
            html_doc = xslt_transform(ftml_doc)

            # Two modifications to make in the html:
            # 1) add all @fontface specs to the <style> element
            # 2) Fix up all occurences of <td> elements referencing font2

            # Add @fontface to <style>
            style = html_doc.find('//style')
            style.text = style.text + '\n' + '\n'.join(
                [x.fontface for x in sname2font.values()])

            # Iterate over all <td> elements looking for font2 and a style or lang indicating feature settings

            classRE = re.compile(r'string\s+(?:(\w+)\s+)?font2$')

            for td in html_doc.findall('//td'):
                tdclass = td.get('class')
                tdlang = td.get('lang')
                m = classRE.match(tdclass)
                if m:
                    sname = m.group(1)
                    if sname:
                        # stylename will get us directly to the font
                        try:
                            td.set(
                                'class',
                                'string {}'.format(sname2font[sname].font_tag))
                            if tdlang:  # If there is also a lang attribute, we no longer need it.
                                del td.attrib['lang']
                        except KeyError:
                            logger.log(
                                "Style name {} not available.".format(sname),
                                "W")
                    elif tdlang:
                        # Otherwise we'll assume there is only the lang attribute
                        try:
                            td.set(
                                'class',
                                'string {}'.format(lang2font[tdlang].font_tag))
                            del td.attrib[
                                'lang']  # lang attribute no longer needed.
                        except KeyError:
                            logger.log(
                                "Style for langtag {} not available.".format(
                                    tdlang), "W")

            # Ok -- write the html out!
            html = ET.tostring(html_doc,
                               pretty_print=True,
                               method='html',
                               encoding='UTF-8')
            with open(outfname, '+wb') as f:
                f.write(html)

Example #58

0

Show file

 def __login(self,
             prelogin_dict,
             prelt,
             sp,
             su,
             url,
             cookies=None,
             door=None):
     headers = {
         'Connection':
         'keep-alive',
         'Pragma':
         'no-cache',
         'Cache-Control':
         'no-cache',
         'Origin':
         'https://login.sina.com.cn',
         'Content-Type':
         'application/x-www-form-urlencoded',
         'Accept':
         '*/*',
         'Referer':
         'https://login.sina.com.cn/signup/signin.php',
         'Accept-Encoding':
         'gzip, deflate, br',
         'Accept-Language':
         'zh-CN,zh-HK;q=0.8,zh-TW;q=0.6,zh;q=0.4,en-US;q=0.2,en;q=0.2',
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
         'Chrome/61.0.3163.91 Safari/537.36'
     }
     if cookies:
         headers['Cookie'] = cookies
     data_dict = {
         'cdult': 3,
         'domain': 'sina.com.cn',
         # 'door': 'mbyqu', 验证码 待定
         'encoding': 'UTF-8',
         'entry': 'account',
         'from': 'null',
         'gateway': 1,
         'nonce': prelogin_dict['nonce'],
         'pagerefer': '',
         'prelt': prelt,  # prelogin time
         'pwencode': 'rsa2',
         'returntype': 'TEXT',
         'rsakv': prelogin_dict['rsakv'],
         'savestate': 0,  # 记住天数
         'servertime': prelogin_dict['servertime'],
         'service': 'account',
         'sp': sp,  # 密码
         'sr': '1920*1080',  # 分辨率
         'su': su,  # 用户名
         'useticket': 0,
         'vsnf': 1,
     }
     if door:
         data_dict['door'] = door
     data_str = ''
     for k in data_dict:
         data_str += k + '=' + str(data_dict[k]) + '&'
     data = data_str[:-1].encode()
     post_request = request.Request(url,
                                    data=data,
                                    headers=headers,
                                    origin_req_host='login.sina.com.cn',
                                    method='POST')
     post_resp = request.urlopen(post_request)
     if post_resp.getcode() == 200:
         read = post_resp.read()
         if post_resp.getheader('Content-Encoding') == 'gzip':
             read = gzip.decompress(read)
         rtn_json = read.decode()
         rtn_dict = json.loads(rtn_json)
         cookies_str = ''
         if rtn_dict['retcode'] == '0':
             # 登陆成功
             cookies_str = post_resp.getheader('Set-Cookie')
             cookies_str = self.__process_cookies_str(cookies_str)
         elif rtn_dict['retcode'] == '4049' or rtn_dict['retcode'] == '2070':
             # 需要验证码 或 验证码不正确
             door_headers = headers.copy()
             door_headers[
                 'Accept'] = 'image/webp,image/apng,image/*,*/*;q=0.8'
             door_headers.pop('Origin')
             door_headers.pop('Content-Type')
             door_rnd = random.randint(10000000, 99999999)
             door_url = 'https://login.sina.com.cn/cgi/pin.php?r=%s&s=0' % str(
                 door_rnd)
             door_req = request.Request(door_url,
                                        headers=door_headers,
                                        origin_req_host='login.sina.com.cn',
                                        method='GET')
             door_resp = request.urlopen(door_req)
             if door_resp.getcode() == 200:
                 cookies_str = door_resp.getheader('Set-Cookie')
                 png = door_resp.read()
                 door = recognize(png)
                 _cookies = self.__process_cookies_str(cookies_str)
                 return self.__login(prelogin_dict, prelt, sp, su, url,
                                     _cookies, door)
         else:
             raise RuntimeError('未知错误，返回消息为： %s' % rtn_dict)
         return cookies_str
     else:
         raise RuntimeError('status: %s,url: %s' %
                            (post_resp.getcode(), post_resp.geturl()))

Example #59

0

Show file

File: parser.py Project: Takunacci61/BluefrontNotes

    def __init__(self, **kwargs):
        """Create the Tree from SVG ``text``."""
        bytestring = kwargs.get('bytestring')
        file_obj = kwargs.get('file_obj')
        url = kwargs.get('url')
        unsafe = kwargs.get('unsafe')
        parent = kwargs.get('parent')
        parent_children = kwargs.get('parent_children')
        tree_cache = kwargs.get('tree_cache')
        element_id = None

        self.url_fetcher = kwargs.get('url_fetcher', fetch)

        if bytestring is not None:
            self.url = url
        elif file_obj is not None:
            bytestring = file_obj.read()
            self.url = getattr(file_obj, 'name', None)
            if self.url == '<stdin>':
                self.url = None
        elif url is not None:
            parent_url = parent.url if parent else None
            parsed_url = parse_url(url, parent_url)
            if parsed_url.fragment:
                self.url = urlunparse(parsed_url[:-1] + ('', ))
                element_id = parsed_url.fragment
            else:
                self.url = parsed_url.geturl()
                element_id = None
            self.url = self.url or None
        else:
            raise TypeError(
                'No input. Use one of bytestring, file_obj or url.')
        self_is_parent = ((parent and self.url == parent.url)
                          or (url and url.startswith('#') and not self.url))
        if self_is_parent:
            root_parent = parent
            while root_parent.parent is not None:
                root_parent = root_parent.parent
            tree = root_parent.xml_tree
        else:
            if not bytestring:
                bytestring = self.fetch_url(parse_url(self.url),
                                            'image/svg+xml')
            if len(bytestring) >= 2 and bytestring[:2] == b'\x1f\x8b':
                bytestring = gzip.decompress(bytestring)
            tree = ElementTree.fromstring(bytestring,
                                          forbid_entities=not unsafe,
                                          forbid_external=not unsafe)
        self.xml_tree = tree
        root = cssselect2.ElementWrapper.from_xml_root(tree)
        style = parent.style if parent else css.parse_stylesheets(self, url)
        if element_id:
            for element in root.iter_subtree():
                if element.id == element_id:
                    root = element
                    self.xml_tree = element.etree_element
                    break
            else:
                raise TypeError(
                    'No tag with id="{}" found.'.format(element_id))
        super().__init__(root, style, self.url_fetcher, parent,
                         parent_children, self.url, unsafe)
        self.root = True
        if tree_cache is not None and self.url:
            tree_cache[(self.url, self.get('id'))] = self

Example #60

0

Show file

import websocket
import json
import gzip

if __name__ == "__main__":

    websocket.enableTrace(True)
    WEBSOCKET_MARKET_HOST = 'wss://ws.wbfex.com/kline-api/ws'  # 市场api
    #ws = websocket.create_connection("ws://echo.websocket.org/")
    ws = websocket.create_connection(WEBSOCKET_MARKET_HOST)
    print("Sending 'Hello, World'...")
    params = {"channel": "market_" + "ethusdt" + "_ticker", "cb_id": 150}
    req = {"event": "sub", 'params': params}
    #ws.send("Hello, World")
    #print(json.dumps(req))
    ws.send(json.dumps(req))
    #exit(0)
    print("Sent")
    print("Receiving...")
    while 1:
        result = ws.recv()
        if isinstance(result, bytes):
            print('bytes')
        data = gzip.decompress(result)
        #recv = str(result, encoding = "utf-8")
        print(data)
    print("Received '%s'" % result)
    ws.close()