def get_content(url, encoding='UTF-8', gzip_decompress=False): """ Return str that contains content of url page """ resp = urlopen(url) result = "" output = resp.read() try: if gzip_decompress: output = gzip.decompress(output) if encoding is not None: result = output.decode(encoding) else: result = output except UnicodeDecodeError: print("There's error while decoding response. Trying again...") # try downloading file urlretrieve(url, "file.tmp") fxml = open("file.tmp", "rb") result = gzip.decompress(fxml.read()).decode() #print(result) fxml.close() return result
def unGzipBody(self) : debugTrace("unGzipBody") if self.chunk : self.httpBody = gzip.decompress(self.assembleChunks(self.httpBody)) else: self.httpBody = gzip.decompress(self.httpBody)
def get_page(self, _url): ''' 获取整个页面数据 return str ''' header = { 'Accept-Encoding': 'gzip' } header['User-Agent'] = self.ualist[random.randint(0, len(self.ualist)-1)] if opts['user_agent']: header['User-Agent'] = opts['user_agent'] req = urllib.request.Request(url = _url, headers = header) pros = opts['proxy'] if pros and pros[0] in ('http', 'https'): req.set_proxy(pros[1], pros[0]) # urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed # https://www.python.org/dev/peps/pep-0476/ context = ssl._create_unverified_context() page = urllib.request.urlopen(req, timeout=80, context=context).read() #gzip_handler = GzipHandler() #proxy_handler = urllib.request.ProxyHandler({'https':'XX.XX.XX.XX:XXXX'}) #proxy_auth_handler = urllib.request.ProxyBasicAuthHandler() #proxy_auth_handler.add_password('realm', 'host', 'username', 'password') #opener = urllib.request.build_opener(gzip_handler, proxy_handler, proxy_auth_handler) #opener.addheaders = [('User-Agent', 'Python-urllib/2.7')] #urllib.request.install_opener(opener) #page = opener.open(_url).read() try: if self.url_type == "2": return "None Content" if self.url_type == "4": return gzip.decompress(page).decode('gb2312').encode('utf-8') else: return gzip.decompress(page) except OSError: return page
def DownloadSubs(self): encoded_l1 = self.server.DownloadSubtitles(self.loginToken, [self.searchOS]) decoded_l1 = base64.b64decode(bytes(encoded_l1['data'][0]['data'], "cp1250")) decompresed_l1 = str(gzip.decompress(decoded_l1), "cp1250") encoded_l2 = self.server.DownloadSubtitles(self.loginToken, [self.searchEqOS]) decoded_l2 = base64.b64decode(bytes(encoded_l2['data'][0]['data'], "cp1250")) decompresed_l2 = str(gzip.decompress(decoded_l2), "cp1250")
def fetch_url(url, path, overwrite=False, verbose=False): """ Download specified url to destination path, returning True if successful. Will not overwrite existing files by default. :param url: :param path: :param overwrite: :param verbose: :return: """ if not overwrite and os.path.exists(path): if verbose: print('The file %s already exists, so %s was not downloaded.' % (path, url)) return True # create empty file for blank URLs, e.g. human homologs for human genome data if url == '': open(path, 'a').close() # for compressed files, make sure to decompress before writing to disk elif url.endswith('.gz'): try: response = urllib.request.urlopen(url) except urllib.error.HTTPError as error: print("Could not retrieve %s: %s [HTTP Error %s]" % (url, error.reason, error.code), file=sys.stderr) return False except urllib.error.URLError as error: print("Could not retrieve %s: %s" % (url, error.reason), file=sys.stderr) return False with open(path, 'wb') as output_file: if sys.platform == 'darwin': # write output in chunks to avoid bug in MacOS [https://bugs.python.org/issue24658] binary_result = gzip.decompress(response.read()) result_length = len(binary_result) chunk_start = 0 while chunk_start < result_length: chunk_end = min(result_length, chunk_start+BINARY_CHUNKSIZE) output_file.write(binary_result[chunk_start:chunk_end]) chunk_start = chunk_end else: output_file.write(gzip.decompress(response.read())) # download uncompressed files directly else: try: urllib.request.urlretrieve(url, filename=path) except urllib.error.HTTPError as error: print("Could not retrieve %s: %s [HTTP Error %s]" % (url, error.reason, error.code), file=sys.stderr) return False except urllib.error.URLError as error: print("Could not retrieve %s: %s" % (url, error.reason), file=sys.stderr) return False return True
def main(argv=None): if (argv is None): argv = sys.argv[1:] arch = DefaultArchiver() arch.debug(3) arch.exclude(".svn") arch.includeall(".") arch.list() hexdigest = arch.digest(__file__, "hexdigest.log", "md5") file = open("hexdigest.log", "r") contents = file.read() file.close() os.remove("hexdigest.log") print hexdigest assert hexdigest == contents jar = JarArchiver() jar.debug(3) #TODO: gzip = GzipArchiver() gzip.debug(3) gzip.compress(__file__, __file__ + ".gz") gzip.decompress(__file__ + ".gz", __file__ + ".gzbak") gzip.dump(__file__ + ".gzbak") os.remove(__file__ + ".gz") os.remove(__file__ + ".gzbak") zip = ZipArchiver("../cwd.zip") zip.debug(3) zip.exclude(".svn") zip.includeall(".") zip.list() zip.list(None, "../cwd-zip.log") zip.assemble() zip.list("../cwd.zip") zip.list("../cwd.zip", "../cwd2-zip.log") os.remove("../cwd.zip") os.remove("../cwd-zip.log") os.remove("../cwd2-zip.log") tar = TarArchiver("cwd.tgz") tar.debug(3) tar.exclude(".svn") tar.includeall(".") tar.list() tar.list(None, "cwd-tgz.log") tar.assemble() tar.list("cwd.tgz") tar.list("cwd.tgz", "cwd2-tgz.log") os.remove("cwd.tgz") os.remove("cwd-tgz.log") os.remove("cwd2-tgz.log")
def get_response_content(url_response): content_encoding = url_response.getheader('Content-Encoding') if content_encoding is not None and 'gzip' in content_encoding: page_content = gzip.decompress(url_response.read()) else: try: page_content = gzip.decompress(url_response.read()) except OSError: try: page_content = URLTool.deflate(url_response.read()) except: page_content = url_response.read() page_content = page_content.decode('utf-8') return page_content
def _gunzip(data): if hexversion < 0x300000: import zlib return zlib.decompress(data, 16 + zlib.MAX_WBITS) else: import gzip return gzip.decompress(data)
def __init__(self): self.requestTime = datetime.datetime.utcnow() request = _HmaRequest() response = urllib.request.urlopen(request) buffer = response.read() body = gzip.decompress(buffer) if response.info().get('Content-Encoding') == 'gzip' else buffer soup = BeautifulSoup(body, "html5lib") # try to get table by id # try to get table by class # fall back to just getting the first table table = soup.find("table", {"id": self.TableId}) or \ soup.find("table", self.TableClass) or \ soup.find("table") if table is None: raise Exception("No proxy table found") # remove all display: none tags [tag.decompose() for tag in table(attrs={"style": re.compile("display\s*:\s*none")})] fields = {i: self.__match_enum(ProxyField, self.FieldPatterns, header, None) for i, header in enumerate(th.getText() for th in table.find("thead").find("tr").find_all("th"))} if None in fields.values(): raise Exception("Missing field") self.proxies = [Proxy({field: self.__get_field(field, cell) for field, cell in [(fields[fieldNumber], cells[fieldNumber]) for fieldNumber in fields]}) for cells in [list(row.findAll("td")) for row in table.find("tbody").findAll("tr")]]
def urlopen(url, headers={}, data=None, retries=RETRIES): '''打开一个http连接, 并返回Request. headers 是一个dict. 默认提供了一些项目, 比如User-Agent, Referer等, 就 不需要重复加入了. 这个函数只能用于http请求, 不可以用于下载大文件. 如果服务器支持gzip压缩的话, 就会使用gzip对数据进行压缩, 然后在本地自动 解压. req.data 里面放着的是最终的http数据内容, 通常都是UTF-8编码的文本. ''' headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] opener = urllib.request.build_opener(ForbiddenHandler) opener.addheaders = [(k, v) for k,v in headers_merged.items()] for _ in range(retries): try: req = opener.open(url, data=data, timeout=TIMEOUT) encoding = req.headers.get('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) return req except OSError as e: print(e) return None
def post_multipart(url, headers, fields, files, retries=RETRIES): content_type, body = encode_multipart_formdata(fields, files) schema = urllib.parse.urlparse(url) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] headers_merged['Content-Type'] = content_type headers_merged['Content-length'] = str(len(body)) for _ in range(retries): try: h = http.client.HTTPConnection(schema.netloc) h.request('POST', url, body=body, headers=headers_merged) req = h.getresponse() encoding = req.getheader('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) return req except OSError as e: print(e) return None
def download_device_info(): deviceurl=r"https://tpairbox.blob.core.windows.net/blobfs/AirBoxDevice.gz" print("downloading data from http://data.taipei/") response = urq.urlopen(deviceurl) print("decompressing the gzip file") with io.open("AirBoxDevice","wb") as out: out.write(gzip.decompress(response.read()))
def wdecompress(param): if PY_VERSION >= 3: return gzip.decompress(param) else: import StringIO with closing(gzip.GzipFile(fileobj=StringIO.StringIO(param))) as gz: return gz.read();
def load(self, location, data=None, headers={}): if not location: raise LoginError() self.last_url = re.sub(r"https?:\/\/[^/]+", r"", location) heads = {"Accept-Encoding": "gzip, deflate", "User-Agent": self.core_cfg.get("User-Agent", "OTRS_US/0.0")} if "Cookies" in self.runt_cfg: heads["Cookie"] = self.runt_cfg["Cookies"] heads.update(headers) r = Request(location, data, headers=heads) try: pg = urlopen(r, timeout=60) except HTTPError as err: self.echo("HTTP Error:", err.getcode()) return except Exception as err: self.echo(repr(err)) return pd = pg.read() if pg.getheader("Content-Encoding") == "gzip": pd = decompress(pd) self.dump_data(pg, pd) if not self.check_login(pd.decode(errors="ignore")): raise LoginError(r.get_full_url()) return self.parse(pd)
def login(self, who=None, req=None): "login and load" if who is None: who = self.runt_cfg if req is None: req = self.last_url user = who["user"] passwd = str(who["password"]) site = who["site"] r = Request( site, urlencode( [("Action", "Login"), ("RequestedURL", req), ("Lang", "en"), ("TimeOffset", ""), ("User", user), ("Password", passwd), ("login", "Login")]).encode()) try: pg = urlopen(r, timeout=60) except BadStatusLine: raise LoginError("BadStatusLine") pd = pg.read() if pg.getheader("Content-Encoding") == "gzip": pd = decompress(pd) m = re.search(r"OTRSAgentInterface=[^;&]+", pg.geturl()) if m and m.group(0): self.runt_cfg["Cookies"] = m.group(0) else: self.runt_cfg.pop("Cookies", None) self.dump_data(pg, pd) return self.parse(pd)
def dld_ldss(sql,cursor): cursor.execute(sql,id = jobs) result = cursor.fetchall() wkt = result[0][1].read() # compressed binary bytes ret = gzip.decompress(wkt) with open(r'C:\Users\tangjing\Desktop\Auto-Reg\ldss.xlsx', 'wb') as b2: b2.write(ret)
def loads(value, compression=None): """Convert Python object to a primitive value for storing to database.""" if value is None: return None if compression == Compression.gzip: value = gzip.decompress(value) return msgpack.loads(value, encoding='utf-8')
def download_html(url): global download_tally headers = {"User-Agent": "Mozilla/5.0"} req = urllib.request.Request(url, None, headers) # put in a retry for loop: for k in range(miss_threshold): try: f = urllib.request.urlopen(req) break except Exception as e: # k5 regularly bugs out, so we need this. print(e.reason) print("Trying again ... ") sys.stdout.flush() time.sleep(30) # hopefully 30s is enough to solve the issue. Otherwise, may as well bail anyway. if (k + 1) >= miss_threshold: # save a copy first: save_sw(C, k5_file, False) f = open(download_tally_file, mode="w") f.write(str(download_tally)) f.close() print("url request failed", miss_threshold, "times. Exiting!\n") sys.exit(0) if f.info().get("Content-Encoding") == "gzip": # sometimes WP spits back gzip, even if not requested. html = gzip.decompress(f.read()) html = str(html) else: html = f.read() f.close() download_tally += 1 return html
def test_gzip(): s = b"Hello World!" contents = BytesIO(s) compressed = b"".join(compress(contents, 1)) uncompressed = decompress(compressed) assert uncompressed == s contents.close()
def __init__(self, **kwargs): """Create the Tree from SVG ``text``.""" bytestring = kwargs.get('bytestring') file_obj = kwargs.get('file_obj') url = kwargs.get('url') unsafe = kwargs.get('unsafe') parent = kwargs.get('parent') parent_children = kwargs.get('parent_children') tree_cache = kwargs.get('tree_cache') element_id = None self.url_fetcher = kwargs.get('url_fetcher', fetch) if bytestring is not None: self.url = url elif file_obj is not None: bytestring = file_obj.read() self.url = getattr(file_obj, 'name', None) if self.url == '<stdin>': self.url = None elif url is not None: parent_url = parent.url if parent else None parsed_url = parse_url(url, parent_url) if parsed_url.fragment: self.url = urlunparse(parsed_url[:-1] + ('',)) element_id = parsed_url.fragment else: self.url = parsed_url.geturl() element_id = None self.url = self.url or None else: raise TypeError( 'No input. Use one of bytestring, file_obj or url.') if parent and self.url == parent.url: root_parent = parent while root_parent.parent: root_parent = root_parent.parent tree = root_parent.xml_tree else: if not bytestring: bytestring = self.fetch_url( parse_url(self.url), 'image/svg+xml') if len(bytestring) >= 2 and bytestring[:2] == b'\x1f\x8b': bytestring = gzip.decompress(bytestring) parser = ElementTree.XMLParser( resolve_entities=unsafe, huge_tree=unsafe) tree = ElementTree.fromstring(bytestring, parser) remove_svg_namespace(tree) self.xml_tree = tree apply_stylesheets(self) if element_id: self.xml_tree = tree.find(".//*[@id='{}']".format(element_id)) if self.xml_tree is None: raise TypeError( 'No tag with id="{}" found.'.format(element_id)) super().__init__( self.xml_tree, self.url_fetcher, parent, parent_children, self.url) self.root = True if tree_cache is not None and self.url: tree_cache[(self.url, self.get('id'))] = self
def search(ctx, cmd, arg, *args): """so <netowkr> <tag1> <tag2> <etc>\nSearch through the stackexchange network for the given tags""" network = args[0] args = args[1:] thingy = ";".join(args) searchQuery = 'search?tagged={}&pagesize=3'.format(thingy) if network in apiURLS: apiURL = apiURLS[network] else: ctx.reply("Invalid Network","StackExchange") return searchURL = apiURL.format(searchQuery) data = request.urlopen(searchURL).read() jsonData = gzip.decompress(data) decoded = jsonLoad(jsonData) results = decoded["total"] if results > 0: res = min(results, 3) ctx.reply("Results 1-{} of {}".format(res, prettyNumber(results)), expandedNames[network]) else: ctx.reply("No results for your query", expandedNames[network]) for q in decoded['questions']: title = q['title'] questionURL = questionURLS[network].format('questions', q['question_id']) ctx.reply('{} • {}'.format(title, questionURL), expandedNames[network])
def get_manifest(self, stream, full): if not hasattr(self, "_streams") or stream not in self._streams: return False self._stream = self._streams[stream] # we no longer need all streams del self._streams print("* Downloading manifest from {0}/{1}/{2}.".format( self._stream["DownloadUrl"], self._stream["TitleFolder"], self._stream["ManifestName"])) # we still need to add the AuthSuffix for the download to work manifest_url = "{0}/{1}/{2}{3}".format( self._stream["DownloadUrl"], self._stream["TitleFolder"], self._stream["ManifestName"], self._stream["AuthSuffix"]) try: response = urlopen(manifest_url) manifest_raw = decompress(response.read()) self._manifest = loads(manifest_raw.decode("utf-8")) return self._verify_manifest(full) except URLError as err: print("! Could not retrieve manifest: {0}.".format(err.reason)) return False
def decompress(data, encoding = None): """check for content gzip encoding""" if not data or not encoding: return data if 'gzip' == encoding.lower(): return gzip.decompress(data) raise Exception("decompress: unknown encoding", encoding)
def BlueprintStringDecode(blob): print('a') comparison = base64.b64encode(bytes( supported_mods['blueprintstring']['magic'] + supported_mods['blueprintstring']['magicend'], 'UTF-8')) if len(blob) <= len(comparison): return '' print('b') try: #base64 character set: alphanumeric, '+', '\'... '=' for padding nonBase64Chars = re.compile(r'[^a-zA-Z0-9+\/=]', re.UNICODE) b64 = nonBase64Chars.sub('', blob) gzipped = base64.b64decode(b64, validate=True) decompressed = gzip.decompress(gzipped)\ .decode(encoding='utf-8',errors='strict') if decompressed.startswith(supported_mods['blueprintstring']['magic']): return\ decompressed[len(supported_mods['blueprintstring']['magic']):\ -len(supported_mods['blueprintstring']['magicend'])] else: return ':(' except TypeError: # base64 decoding error return 't' # except Error as e: # return e except OSError: # not gzipped return 'o'
def validate_state(zip_state): assert isinstance(zip_state, zipfile.ZipExtFile) state_output = gzip.decompress(zip_state.read()) state = json.loads(state_output) assert len(state["frameworks"]) == 2, "bundle must contains information about frameworks" task_count = len(state["frameworks"][1]["tasks"]) + len(state["frameworks"][0]["tasks"]) assert task_count == 1, "bundle must contains information about tasks"
def post_multipart(url, headers, fields, files, retries=RETRIES): content_type, body = encode_multipart_formdata(fields, files) schema = urllib.parse.urlparse(url) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] headers_merged["Content-Type"] = content_type headers_merged["Content-length"] = str(len(body)) for i in range(retries): try: h = http.client.HTTPConnection(schema.netloc) h.request("POST", url, body=body, headers=headers_merged) req = h.getresponse() encoding = req.getheader("Content-encoding") req.data = req.read() if encoding == "gzip": req.data = gzip.decompress(req.data) elif encoding == "deflate": req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) return req except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) # return None return None
def get_page_content(tags, page_num): tasks = list() # Page template url = "http://nhentai.net/search/?q={}&page={}".format(tags, page_num) response = http_request(url) # Decompress response and decode into plain string buffer = gzip.decompress(response.read()).decode("utf-8") # Parse content parsed = BeautifulSoup(buffer, 'html.parser') gallery = parsed.body.main.div.div for div in gallery: # Filter out NavigableString class if isinstance(div, bs4.NavigableString): continue album = dict() album["data-tags"] = div.attrs["data-tags"] album["href"] = div.a.attrs["href"] album["cover"] = div.a.img.attrs["src"] album["caption"] = div.a.div.get_text() tasks.append(album) return tasks
def _crawl(self, nid): # pdb.set_trace() if self._collection.find_one({"_id": nid}): print("failed: news {} exists ... skip".format(nid)) return url = "http://info.3g.qq.com/g/s?aid=finance_ss&id=" + nid headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip,deflate,sdch", "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4", "User-Agent": "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19", } # print(url) with urlopen(Request(url, headers=headers), timeout=30) as fp: html_content = gzip.decompress(fp.read()).decode("utf-8") page = Page(html_content) create_at = page.get_create_time() title = page.get_title() content = page.get_content() if title == None or content == "": raise Exception("no content") self._collection.insert(dict(_id=nid, create_at=create_at, title=title, content=content)) print("success: get {} characters from {} queue {}".format(len(content), url, self._queue.qsize()))
def get_nzb(dataset=None): user = auth() if user: id = request.query.guid or None if not id: id = request.query.id or None if id: with db_session() as db: release = db.query(Release).join(NZB).join(Category).filter(Release.id == id).one() if release: release.grabs += 1 user.grabs += 1 db.merge(release) db.merge(user) db.commit() data = release.nzb.data response.set_header('Content-type', 'application/x-nzb-compressed-gzip') response.set_header('X-DNZB-Name', release.search_name) response.set_header('X-DNZB-Category', release.category.name) response.set_header('Content-Disposition', 'attachment; filename="{0}"' .format(release.search_name.replace(' ', '_') + '.nzb.gz') ) return gzip.decompress(data) else: return api_error(300) else: return api_error(200) else: return api_error(100)
def get_data(prefix, page_number=1, from_date=datetime.datetime(2008, 7, 28), to_date=datetime.datetime.now()): url = "http://api.stackexchange.com/2.2/{prefix}?" \ "page={page_num}&pagesize=100&" \ "fromdate={fromdate}&" \ "todate={todate}&" \ "site=stackoverflow&"\ "key={key}&" \ "filter=!6JvKMTZy8(80_".format(prefix=quote(prefix), key="gytnic74fozY)jD39pQSzg((", page_num=page_number, fromdate=int(from_date.timestamp()), todate=int(to_date.timestamp())) logging.getLogger(__name__).info('Fetching: ' + url) while True: try: resp = ur.urlopen(url) break except urllib.error.URLError as e: if e.errno != 110: # Connection - timeout. Ignore it and try again raise else: sleep(3, 7) data = gzip.decompress(resp.read()) data = json.loads(data.decode()) return data
def forward_request(self, method, path, data, headers): if path.split('?')[0] == '/health': return serve_health_endpoint(method, path, data) if method == 'POST' and path == '/graph': return serve_resource_graph(data) # kill the process if we receive this header headers.get(HEADER_KILL_SIGNAL) and os._exit(0) target = headers.get('x-amz-target', '') auth_header = get_auth_string(method, path, headers, data) if auth_header and not headers.get('authorization'): headers['authorization'] = auth_header host = headers.get('host', '') orig_req_url = headers.pop(HEADER_LOCALSTACK_REQUEST_URL, '') headers[HEADER_LOCALSTACK_EDGE_URL] = (re.sub( r'^([^:]+://[^/]+).*', r'\1', orig_req_url) or 'http://%s' % host) # extract API details api, port, path, host = get_api_from_headers(headers, method=method, path=path, data=data) if api and config.LS_LOG: # print request trace for debugging, if enabled LOG.debug('IN(%s): "%s %s" - headers: %s - data: %s' % (api, method, path, dict(headers), data)) set_default_region_in_headers(headers) if port and int(port) < 0: return 404 if not port: api, port = get_api_from_custom_rules(method, path, data, headers) or (api, port) if not port: if method == 'OPTIONS': if api and config.LS_LOG: # print request trace for debugging, if enabled LOG.debug('OUT(%s): "%s %s" - status: %s' % (api, method, path, 200)) return 200 if api in ['', None, API_UNKNOWN]: truncated = truncate(data) if auth_header or target or data or path not in [ '/', '/favicon.ico' ]: LOG.info(( 'Unable to find forwarding rule for host "%s", path "%s %s", ' 'target header "%s", auth header "%s", data "%s"') % (host, method, path, target, auth_header, truncated)) else: LOG.info(( 'Unable to determine forwarding port for API "%s" - please ' 'make sure this API is enabled via the SERVICES configuration' ) % api) response = Response() response.status_code = 404 response._content = '{"status": "running"}' return response if api and not headers.get('Authorization'): headers['Authorization'] = aws_stack.mock_aws_request_headers( api)['Authorization'] headers[HEADER_TARGET_API] = str(api) headers['Host'] = host if isinstance(data, dict): data = json.dumps(data) encoding_type = headers.get('Content-Encoding') or '' if encoding_type.upper() == GZIP_ENCODING.upper() and api not in [S3]: headers.set('Content-Encoding', IDENTITY_ENCODING) data = gzip.decompress(data) lock_ctx = BOOTSTRAP_LOCK if persistence.API_CALLS_RESTORED or is_internal_call_context(headers): lock_ctx = empty_context_manager() with lock_ctx: return do_forward_request(api, method, path, data, headers, port=port)
def fetch(self, timeout=1): """ Fetches the url, parses the title, desc and icon for the website passed in """ q = Request(self.url) q.add_header('User-Agent', USER_AGENT) html = urlopen(q, timeout=timeout) encoding = html.getheader("Content-Encoding") content = html.read() if encoding == "gzip": content = gzip.decompress(content) soup = BeautifulSoup( content.decode("utf-8", "ignore"), "html.parser" ) title_elems = [soup.findAll(attrs={ attr: re.compile(r"title", re.I) }) for attr in ["name", "property"]] for i in range(1): if len(title_elems[i]) > 0: self.title = title_elems[i][0]["content"] break else: # Get the <title> as a string self.title = str(soup.title.string) titles = re.compile("[-–|:•]+").split(self.title) self.title = titles[0].strip() # Get the desc from whatever we can find desc_elems = soup.findAll("meta", attrs={"name": re.compile(r"Desc", re.I)}) for meta_elem in desc_elems: if meta_elem.attrs["content"]: self.desc = meta_elem.attrs["content"] break if len(self.desc.split()) > 30: self.desc = " ".join(self.desc.split()[0:29]).strip() self.desc = self.desc.strip("…") self.desc = self.desc.strip(".") self.desc += "..." icon_link = soup.find("link", rel=re.compile(r"shortcut icon")) if icon_link is None: icon_link = soup.find("link", rel=re.compile(r"icon")) if icon_link is not None: # Check if icon link is global or relative icon_href = icon_link["href"] if icon_href.find("http") != -1: self.icon = icon_href else: self.icon = self.url + icon_href # Fetch Open Graph Image image = soup.find("meta", property="og:image") if image is None: # Use favicon if no image is specified self.image = self.icon if image is not None: # Check if image link is global or relative image_link = image["content"] if image_link.find("http") != -1: self.image = image_link else: self.image = self.url + image_link """ self.image = builder.create_url(
import gzip data = b"bjfbjskdfkjsdhfbsdjkfsdf" t = gzip.compress(data) print(t) data = gzip.decompress(t) print(data)
def gzip_decompress(data): return gzip.decompress(data)
def testurl(url, showresponse=0, responseheaders=0): try: headerdict = json.loads(headerstxt) headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'} rqobj = urlreq.Request(url, None, headers) if (headerstxt != ""): for key in headerdict: rqobj.add_header(key, headerdict[key]) result = urlreq.urlopen(rqobj, context=ctx) responsebody = result.read() print( "------------------------------------------------------------------------------------------------" ) print(Fore.GREEN + "URL: " + url) print(Fore.WHITE + "HTTP Status: " + str(result.status)) print("Response Size:" + str(len(responsebody))) piiList = "" if (showresponse == 1): print("Response Sample:") try: print(gzip.decompress(responsebody)[:300]) if re.search(r'\S+@\S+', gzip.decompress(responsebody)): piiList = piiList + " email |" if re.search('(lastname|firstname|first.name|last.name)', gzip.decompress(responsebody), re.IGNORECASE): piiList = piiList + " name |" except: print(responsebody[:300]) print("REGULAR") #print(responsebody.decode("utf8", 'ignore')[:300]) if re.search(r'\S+@\S+', responsebody.decode("utf8", 'ignore')): piiList = piiList + " email |" if re.search('(lastname|firstname|first.name|last.name)', responsebody.decode("utf8", 'ignore'), re.IGNORECASE): piiList = piiList + " name |" if (responseheaders == 1): print("") print("Response Headers:") respheaders = result.info() print(respheaders) if (piiList != ""): print(Fore.RED + " Possible PII ( |" + piiList + ") Found") return (True, result.url) except urllib.error.URLError as e: print(Fore.GREEN + "URL: " + url) try: print(Fore.WHITE + "HTTP Status: " + str(e.reason)) responsebody = e.read() try: print(gzip.decompress(responsebody)[:300]) except: print(e.read(300).decode("utf8", 'ignore')) #response = str(e.read().decode("utf8", 'ignore')) #regex results to check for some particular frameworks in debug mode if re.search(r'DEBUG...True', responsebody): print(Fore.RED + " Possible Django Debug Page Found") if re.search(r'Whoops..There was an error', responsebody): print(Fore.RED + " Possible Laravel Debug Page Found") except: print(Fore.RED + "An error occurred") print(Fore.WHITE + "") return (False, '') except: return (False, '')
def test_check_messages(): sub = PubsubSubscriber(sub_client=pubsub_v1.SubscriberClient()) for message in sub.pull('x-i-a-test', 'xialib-sub-01'): header, data, id = sub.unpack_message(message) assert len(json.loads(gzip.decompress(data).decode())) == 2 sub.ack('x-i-a-test', 'xialib-sub-01', id)
def _gzip_loads(self, data): return self._pickle_loads(gzip.decompress(data))
def load_data_from_request(request): data = None if request.method == "POST": if request.content_type in ["", "text/plain", "application/json"]: data = request.body else: data = request.POST.get("data") else: data = request.GET.get("data") if not data: return None # add the data in sentry's scope in case there's an exception with configure_scope() as scope: scope.set_context("data", data) scope.set_tag("origin", request.META.get("REMOTE_HOST", "unknown")) scope.set_tag("referer", request.META.get("HTTP_REFERER", "unknown")) # since version 1.20.0 posthog-js adds its version to the `ver` query parameter as a debug signal here scope.set_tag("library.version", request.GET.get("ver", "unknown")) compression = ( request.GET.get("compression") or request.POST.get("compression") or request.headers.get("content-encoding", "") ) compression = compression.lower() if compression == "gzip" or compression == "gzip-js": if data == b"undefined": raise RequestParsingError( "data being loaded from the request body for decompression is the literal string 'undefined'" ) try: data = gzip.decompress(data) except (EOFError, OSError) as error: raise RequestParsingError("Failed to decompress data. %s" % (str(error))) if compression == "lz64": if not isinstance(data, str): data = data.decode() data = data.replace(" ", "+") data = lzstring.LZString().decompressFromBase64(data) if not data: raise RequestParsingError("Failed to decompress data.") data = data.encode("utf-16", "surrogatepass").decode("utf-16") base64_decoded = None try: base64_decoded = base64_decode(data) except Exception: pass if base64_decoded: data = base64_decoded try: # parse_constant gets called in case of NaN, Infinity etc # default behaviour is to put those into the DB directly # but we just want it to return None data = json.loads(data, parse_constant=lambda x: None) except (json.JSONDecodeError, UnicodeDecodeError) as error_main: raise RequestParsingError("Invalid JSON: %s" % (str(error_main))) # TODO: data can also be an array, function assumes it's either None or a dictionary. return data
def planttfdb(MSUID): # Find the file url = 'http://planttfdb.cbi.pku.edu.cn/download.php' html_page = helper.connectionError(url) soup = BeautifulSoup(html_page.content, "lxml") # Find headers for search in soup.findAll('table', {"id": "oid_tfid"}): for linkfound in search.findAll('a'): if (linkfound.contents[0] == "Oryza sativa subsp. japonica"): link = 'http://planttfdb.cbi.pku.edu.cn/' + linkfound.get( 'href') break # Give the entire name of the file with the extension .gz filename = link.split("/")[-1] # Give the name of the file without .gz uncompressName = filename[:-3] + ".txt" pathToFile = helper.formatPathToFile(uncompressName) # Test existant file if (not helper.existFile(pathToFile)): print("on telecharege") # Fetch the file by the url and decompress it r = requests.get(link) decompressedFile = gzip.decompress(r.content) # Create the file .txt with open(pathToFile, "wb") as f: f.write(decompressedFile) f.close() # Use the previous created file (.txt) with open(pathToFile, "r+b") as file: # Import file tab-delimited try: array = pd.read_csv(file, sep="\t", header=None) except pd.io.common.EmptyError: array = pd.DataFrame() # Named columns array.columns = ["TF_ID", "Gene_ID", "Family"] data = array.loc[array['TF_ID'] == MSUID] if (not data.empty): return data else: data = array.loc[array['Gene_ID'] == MSUID] if (data.empty): return False else: hashmap = {"Family": data["Family"].values[0]} return hashmap
async def decompress(cls, data: bytes, headers: T_Headers) -> bytes: return gzip.decompress(data)
host='localhost', port='5432') cur = conn.cursor() cur.execute(''' CREATE TABLE IF NOT EXISTS test_temp ( id BIGINT PRIMARY KEY NOT NULL, item JSONB NOT NULL, bid BIGINT NOT NULL, buyout BIGINT NOT NULL, quantity INT NOT NULL, unit_price BIGINT NOT NULL, time_left TEXT NOT NULL );''') conn.commit() with gzip.open('/home/olenovo/projects/Wow2/out_157/202011232000.gz') as f: file_content = gzip.decompress(f.read()) record_list = json.loads(file_content)['auctions'] table_name = 'test_temp' columns = [ 'id', 'item', 'bid', 'buyout', 'quantity', 'unit_price', 'time_left' ] for i in range(0, len(record_list)): vals = [] for column in columns: try: vals.append(record_list[i][column]) except KeyError: vals.append(0) sql_string = 'insert into test_temp (%s) values (%s, %s, %s, %s, %s, %s, %s) on conflict (id) do nothing' #print(cur.mogrify(sql_string, (AsIs(', '.join(columns)), vals[0], json.dumps(vals[1]), vals[2], vals[3], vals[4], vals[5], vals[6])))
+ "BOVPqjhAdItXunkKPR5fMz/cTcnTFYJWqe8rm1dXKM/EocaM2K6XMEsqs0tb9JkroWWf7oQUKRnU6l3q3jLn8X+d/LFN2PT3kt5Sjm7iw0i+SRt0nTkQYEtQEJwmfoZF" + "bwxV1OClngYvGQ2R7LO6VJMB+aYfTwEQJCn80WoB3g30BMKofDdmU0ItOXEnSOmIreFn5c0Az6p6XjrREqUD36oxMBNNXIrnBn89yRkzALdV+k9yOZHLxZvvr1V8fVja" + "jAmZFnV6TAPJc90eOReCERzlbpw/QGdMQiOBXr1GbD58UkqWQ6Fh7T1jpZKc+PLtPrm8JetD97P2T1StlxPOBGgHC5yFZgSCCgoq7FO8NAcpal6ypI+gEXyeiKVbS2qq" + "FI+uLOyaGguEACaFs+Yb2Fb1GWzekjmCmCGTN8zq/hPs2DdaneEYjHWch84bABK00op0aQLMUuh5sgvP+hdjeaMXtUKvYrlfFW42rSw+2HiOFw4Hk7Wnt4VK3SHXZy9s" + "wkYuxFuiPcnxTo9h3Mws9VBr2epo+okJfhkFV731WeF65YS/AXuheb6CMotLzXFUFWGReoWzj2XGFZJd2k1Jq3M06IAKFfmKzs4EbliNkcHrQiVaRfsbkCo9i8aAVKE4" + "fdRLPDwqb6nEony/wJeQHE1U8gZkt17xpvFBwOs00UTGPOJx3dme42yPe+Ws2/pSVsFHER+0jRntnjzyhI4XWpjq7mp6V54pIAHDghNKgnggWIT2zD/ZXlX+I9GvCn9p" + "+DdEctYlMBPQ9v5y1Chsxk3daGnPWK3OmFVRVleNRgTqjEvcX93e4rn1SMsZa8LXqmoqz+Xl6IQNG94faJkckZfOl77NnQrkzKxRdbOVBMvTxieAUDu55EZXTDnthWzH" + "c6KKBhmUKjOlqxSPLCoFx4L7gT3nvm+3puGoMU8s4exfcNEcl58zAtdUxKygNvRSyaqt4jxY59ZlBJ3g1QSQlVd771ftfLkORDm9dS5gJ/7rvUtpXHt9/nfx3P7+6W0Q" + "0LL2yvDvAdnA/0c/4DXuairg+8Hj8YxB4K/vN/nP+6vA31SGvs8T5L0fwoCPrX/fAv6fR0cG/zYJL/2+sdcc9vy/P04M/n75Zd5mNLzeHQbs+b+/b80Z/Nzs/f36aPuO" + "cAj4fr/K/fdTsBdWFHDMMAz/xftetvevTWD284OlvuBnk+P+b76/X8JOVUBfX2uDwO7fd7X6iIB/Jz8PGe+ndwLfT5h02Xd3kK/DV58Kz58d6v6/9/qeH2xoBWbX9P2H" + "ad4+TFINjgr2+mYerqgEDiVer2L/BXwDn7c6Du4+EANeCwJiMj5ifj78+P59VCQkCHQtBXziS22vfYwJLAh9UT1efStt3v1z9b7fWvu48Qg4dPj31rC3ZnDykXL6/WNx" + "RPfvaazvfenu3zQv0O7pTL+g/Ofi39t+390DUncAy9prwd+vo4zP5ykBNG2Pd7a/JDn/Hv5b3Lj1u30qIeBH4S7xk8P/++Um4N+sQh9D31/8+72H5++K9a1/J4Q/mx3A" + "v7tr/93g7srvT8Pr8/82h94uJfp33ltfz/UdMwb/L/6vhFn9P9h9dSn6RANQkP8BzUk35Q9BAAA=") g = base64.b64decode(_g)[1:] for i in range(base64.b64decode(_g)[0]): g = gzip.decompress(g) g=list(g) def gr(x,y): if(x>=0 and y>=0 and x<500 and y<180): return g[y*500 + x]; return 0; def gw(x,y,v): if(x>=0 and y>=0 and x<500 and y<180): g[y*500 + x]=v; def td(a,b): return ((0)if(b==0)else(a//b)) def tm(a,b): return ((0)if(b==0)else(a%b)) s=[] def sp(): global s
nameo = sys.argv[1] named = sys.argv[2] # String is Base64 Encoded with open('origin.txt', 'rb') as fo: raw = fo.read() # raw_zip = Decoded ByteString raw_zip = bytearray(base64.b64decode(raw))[4:] # gzip_header to be kept zip_header = raw_zip[:10] # xml file raw_content = gzip.decompress(raw_zip) # replacement here result_content = raw_content.decode().replace(nameo, named).encode() # compress the data and append the sap AO accepted gzip header result_zip = gzip.compress(result_content, 6) result_zip = zip_header + bytearray(result_zip)[10:] # add AO accepted string header and base64 encoding result = base64.b64encode( len(result_content).to_bytes(4, "little") + result_zip) # generating output with open('result.txt', 'wb') as fw: fw.write(result)
def main(args, pacu_main: 'Main'): session = pacu_main.get_active_session() ###### Don't modify these. They can be removed if you are not using the function. args = parser.parse_args(args) print = pacu_main.print fetch_data = pacu_main.fetch_data ###### instances = [] templates = [] summary_data = {'instance_downloads': 0, 'template_downloads': 0} if args.instance_ids is not None: for instance in args.instance_ids.split(','): instance_id, region = instance.split('@') instances.append({'InstanceId': instance_id, 'Region': region}) elif args.template_ids is None: # If args.instance_ids was not passed in, # only fetch instances if args.template_ids # is also None if fetch_data(['EC2', 'Subnets'], module_info['prerequisite_modules'][0], '--instances') is False: print('Pre-req module not run successfully. Exiting...') return None instances = session.EC2['Instances'] if args.template_ids is not None: for template in args.template_ids.split(','): template_id, region = template.split('@') templates.append({ 'LaunchTemplateId': template_id, 'Region': region }) elif args.instance_ids is None: # If args.template_ids was not passed in, # only fetch templates if args.instance_ids # is also None if fetch_data(['EC2', 'LaunchTemplates'], module_info['prerequisite_modules'][0], '--launch-templates') is False: print('Pre-req module not run successfully. Exiting...') templates = [] else: templates = session.EC2['LaunchTemplates'] if instances: print('Targeting {} instance(s)...'.format(len(instances))) for instance in instances: # if the filter is actived check the tags. If tags do not match skip instance if args.filter and not has_tags(args.filter.split(','), instance): continue instance_id = instance['InstanceId'] region = instance['Region'] client = pacu_main.get_boto3_client('ec2', region) try: user_data = client.describe_instance_attribute( InstanceId=instance_id, Attribute='userData')['UserData'] except ClientError as error: code = error.response['Error']['Code'] print('FAILURE: ') if code == 'AccessDenied': print(' Access denied to DescribeInstanceAttribute.') print('Skipping the rest of the instances...') break else: print(' ' + code) if 'Value' in user_data.keys(): decoded = base64.b64decode(user_data['Value']) try: decompressed = gzip.decompress(decoded) formatted_user_data = '{}@{}:\n{}\n\n'.format( instance_id, region, decompressed.decode('utf-8', 'backslashreplace')) except: formatted_user_data = '{}@{}:\n{}\n\n'.format( instance_id, region, decoded.decode('utf-8', 'backslashreplace')) print(' {}@{}: User Data found'.format(instance_id, region)) # Check for secrets find_secrets(formatted_user_data) # Write to the "all" file with save('ec2_user_data/all_user_data.txt', 'a+') as f: f.write(formatted_user_data) # Write to the individual file with save('ec2_user_data/{}.txt'.format(instance_id)) as f: f.write( formatted_user_data.replace('\\t', '\t').replace( '\\n', '\n').rstrip()) summary_data['instance_downloads'] += 1 else: print(' {}@{}: No User Data found'.format( instance_id, region)) print() else: print('No instances to target.\n') if templates: print('Targeting {} launch template(s)...'.format(len(templates))) for template in templates: template_id = template['LaunchTemplateId'] region = template['Region'] client = pacu_main.get_boto3_client('ec2', region) all_versions = [] try: response = client.describe_launch_template_versions( LaunchTemplateId=template_id) all_versions.extend(response['LaunchTemplateVersions']) except ClientError as error: code = error.response['Error']['Code'] print('FAILURE: ') if code == 'AccessDenied': print(' Access denied to DescribeLaunchTemplateVersions.') print('Skipping the rest of the launch templates...\n') break else: print(' ' + code) while response.get('NextToken'): response = client.describe_launch_template_versions( LaunchTemplateId=template_id, NextToken=response['NextToken']) all_versions.extend(response['LaunchTemplateVersions']) for version in all_versions: if version['LaunchTemplateData'].get('UserData'): try: was_unzipped = False user_data = version['LaunchTemplateData']['UserData'] formatted_user_data = '{}-version-{}@{}:\n{}\n\n'.format( template_id, version['VersionNumber'], region, base64.b64decode(user_data).decode('utf-8')) except UnicodeDecodeError as error: try: decoded = base64.b64decode(user_data) decompressed = gzip.decompress(decoded) formatted_user_data = '{}@{}:\n{}\n\n'.format( instance_id, region, decompressed.decode('utf-8')) was_unzipped = True except: print('ERROR: GZIP decrompressing template data') print(' {}-version-{}@{}: User Data found'.format( template_id, version['VersionNumber'], region)) if was_unzipped: print(' Gzip decoded the User Data') # Write to the "all" file with save('ec2_user_data/all_user_data.txt', 'a+') as f: f.write(formatted_user_data) # Write to the individual file with save('ec2_user_data/{}-version-{}.txt'.format( template_id, version['VersionNumber'])) as f: f.write( formatted_user_data.replace('\\t', '\t').replace( '\\n', '\n').rstrip()) summary_data['template_downloads'] += 1 else: print(' {}-version-{}@{}: No User Data found'.format( template_id, version['VersionNumber'], region)) print() else: print('No launch templates to target.\n') return summary_data
def get_primary_xml(self, repomd_xml): primary_xml_url = repomd_xml.find(f"{REPO_NS}data[@type='primary']/{REPO_NS}location").attrib["href"] response = self.loader.load(urljoin(self.url, primary_xml_url)) return ElementTree.fromstring(gzip.decompress(response.content))
def gen_archive(config: ConfigParser, log: GitLog, outdir: Path) -> None: for s in config.sections(): print(f"Working on archive page of: {s}") name = config.get(s, "name") info_url = config.get(s, "info_url", fallback=None) archive_dir = outdir / f"archive/{s}" archive_files = archive_dir.glob("commit_*.txt.gz") # read cache reports_cache: Dict[Path, Report] = {} cache_fn = f"{archive_dir}/reports.cache" if path.exists(cache_fn): with open(cache_fn, "rb") as f: reports_cache = pickle.load(f) cache_age = path.getmtime(cache_fn) # remove outdated cache entries reports_cache = { k: v for k, v in reports_cache.items() if path.getmtime(k) < cache_age } # read all archived reports archive_reports: Dict[GitSha, Report] = {} for fn in archive_files: report: Report if fn in reports_cache: report = reports_cache[fn] else: with open(fn, "rb") as f: content = gzip.decompress(f.read()) report = parse_report(content.decode("utf-8", errors="replace"), log) report.archive_path = path.basename(fn)[:-3] reports_cache[fn] = report assert report.sha and report.sha not in archive_reports archive_reports[report.sha] = report # write cache if reports_cache: with open(cache_fn, "wb") as f: pickle.dump(reports_cache, f) # loop over all relevant commits all_url_rows = [] all_html_rows = [] max_age_full = max([-1] + [log.index_by_sha[sha] for sha in archive_reports]) for commit in log.commits[: max_age_full + 1]: sha = commit.sha html_row = "<tr>" html_row += commit_cell(sha, log) if sha in archive_reports: report = archive_reports[sha] assert report.archive_path html_row += status_cell(report.status, report.archive_path) html_row += f'<td align="left">{report.summary}</td>' archive_base_url = "https://dashboard.cp2k.org/archive" url_row = f"{archive_base_url}/{s}/{report.archive_path}.gz\n" else: html_row += 2 * "<td></td>" url_row = "" html_row += f'<td align="left">{html.escape(commit.author_name)}</td>' html_row += f'<td align="left">{html.escape(commit.message)}</td>' html_row += "</tr>\n\n" all_html_rows.append(html_row) all_url_rows.append(url_row) # generate html pages for full_archive in (False, True): if full_archive: html_out_postfix = "index_full.html" urls_out_postfix = "list_full.txt" toggle_link = '<p>View <a href="index.html">recent archive</a></p>' max_age = max_age_full else: html_out_postfix = "index.html" urls_out_postfix = "list_recent.txt" toggle_link = '<p>View <a href="index_full.html">full archive</a></p>' max_age = 100 # generate archive index output = html_header(title=name) output += '<p>Go back to <a href="../../index.html">main page</a></p>\n' if info_url: output += f'<p>Get <a href="{info_url}">more information</a></p>\n' output += gen_plots(archive_reports, log, archive_dir, full_archive) output += toggle_link output += '<table border="1" cellspacing="3" cellpadding="5">\n' output += "<tr><th>Commit</th><th>Status</th><th>Summary</th>" output += "<th>Author</th><th>Commit Message</th></tr>\n\n" output += "".join(all_html_rows[: max_age + 1]) output += "</table>\n" output += toggle_link output += html_footer() write_file(archive_dir / html_out_postfix, output) url_list = "".join(all_url_rows[: max_age + 1]) write_file(archive_dir / urls_out_postfix, url_list)
def bytes2dir(tarbytes: bytes, dirname='.') -> None: """Take the bytes that make-up an gzip'd tar archive and decompress to disk.""" fileobj = io.BytesIO(tarbytes) with tarfile.TarFile(fileobj=io.BytesIO(gzip.decompress(tarbytes))) as tar: tar.extractall(dirname)
def get_data(): remark = request.args.get( 'remark') if 'remark' in request.args else 'normal' project = request.args.get('project') User_Agent = request.headers.get( 'User-Agent' ) #Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Host = request.headers.get('Host') #: 10.16.5.241:5000 Connection = request.headers.get('Connection') #: keep-alive Pragma = request.headers.get('Pragma') #: no-cache Cache_Control = request.headers.get('Cache-Control') #: no-cache Accept = request.headers.get('Accept')[0:254] if request.headers.get( 'Accept') else None #: image/webp,image/apng,image/*,*/*;q=0.8 Accept_Encoding = request.headers.get( 'Accept-Encoding')[0:254] if request.headers.get( 'Accept-Encoding') else None #: gzip, deflate Accept_Language = request.headers.get( 'Accept-Language')[0:254] if request.headers.get( 'Accept-Language') else None #: zh-CN,zh;q=0.9 ua_platform = request.user_agent.platform #客户端操作系统 ua_browser = request.user_agent.browser #客户端的浏览器 ua_version = request.user_agent.version #客户端浏览器的版本 ua_language = request.user_agent.language #客户端浏览器的语言 ext = request.args.get('ext') url = request.url # ip = '124.115.214.179' #测试西安bug # ip = '36.5.99.68' #测试安徽bug if request.headers.get('X-Forwarded-For') is None: ip = request.remote_addr #服务器直接暴露 else: ip = request.headers.get('X-Forwarded-For') #获取SLB真实地址 ip_city, ip_is_good = get_addr(ip) ip_asn, ip_asn_is_good = get_asn(ip) if ip_is_good == 0: ip_city = '{}' if ip_asn_is_good == 0: ip_asn = '{}' referrer = request.referrer if request.method == 'POST': # print(request.form.get()) if 'data_list' in request.form: data_list = request.form.get('data_list') de64 = base64.b64decode( urllib.parse.unquote(data_list).encode('utf-8')) try: data_decodes = json.loads(gzip.decompress(de64)) except: data_decodes = json.loads(de64) for data_decode in data_decodes: insert_data(project=project, data_decode=data_decode, User_Agent=User_Agent, Host=Host, Connection=Connection, Pragma=Pragma, Cache_Control=Cache_Control, Accept=Accept, Accept_Encoding=Accept_Encoding, Accept_Language=Accept_Language, ip=ip, ip_city=ip_city, ip_asn=ip_asn, url=url, referrer=referrer, remark=remark, ua_platform=ua_platform, ua_browser=ua_browser, ua_version=ua_version, ua_language=ua_language, ip_is_good=ip_is_good, ip_asn_is_good=ip_asn_is_good) elif 'data' in request.form: # print(request.cookies) data = request.form.get('data') de64 = base64.b64decode(urllib.parse.unquote(data).encode('utf-8')) try: data_decode = json.loads(gzip.decompress(de64)) except: data_decode = json.loads(de64) insert_data(project=project, data_decode=data_decode, User_Agent=User_Agent, Host=Host, Connection=Connection, Pragma=Pragma, Cache_Control=Cache_Control, Accept=Accept, Accept_Encoding=Accept_Encoding, Accept_Language=Accept_Language, ip=ip, ip_city=ip_city, ip_asn=ip_asn, url=url, referrer=referrer, remark=remark, ua_platform=ua_platform, ua_browser=ua_browser, ua_version=ua_version, ua_language=ua_language, ip_is_good=ip_is_good, ip_asn_is_good=ip_asn_is_good) else: write_to_log(filename='api', defname='get_datas', result=str(request.form)) # print(request.form) elif request.method == 'GET': # try: if 'data' in request.args: data = request.args.get('data') de64 = base64.b64decode(urllib.parse.unquote(data).encode('utf-8')) try: data_decode = json.loads(gzip.decompress(de64)) except: data_decode = json.loads(de64) insert_data(project=project, data_decode=data_decode, User_Agent=User_Agent, Host=Host, Connection=Connection, Pragma=Pragma, Cache_Control=Cache_Control, Accept=Accept, Accept_Encoding=Accept_Encoding, Accept_Language=Accept_Language, ip=ip, ip_city=ip_city, ip_asn=ip_asn, url=url, referrer=referrer, remark=remark, ua_platform=ua_platform, ua_browser=ua_browser, ua_version=ua_version, ua_language=ua_language, ip_is_good=ip_is_good, ip_asn_is_good=ip_asn_is_good) else: write_to_log(filename='api', defname='get_datas', result=url) else: write_to_log(filename='api', defname='get_datas', result=str(request.method) + url) bitimage1 = os.path.join('image', '43byte.gif') with open(bitimage1, 'rb') as f: returnimage = f.read() return Response(returnimage, mimetype="image/gif")
def add(): global wrok global insatData wrok = 0 request.get_data() compressed_data = request.data decom = gzip.decompress(compressed_data) data1 = decom.decode('UTF-8') print(type(data1)) ldata = {} print(str(len(names))) for ind in range(len(names)): n1 = names[ind] print('CHECK ' + str(ind) + ' ' + n1 + ' ') orig = orignames[ind] p1 = data1.find(n1) if p1 < 0: print('No name in data ' + n1 + ' ' + orig) continue s1 = '' while (data1[p1] != ' '): p1 += 1 p1 += 2 while (data1[p1] != '\n'): s1 += data1[p1] p1 += 1 print(s1) name = s1.split('.') nm = '' for i in range(3): nm += name[i] + '.' nm += name[3] print(nm) nm += '.Znachenie' p2 = data1.find(nm) if p2 < 0: print('!!!No value ' + n1 + ' ' + orig + ' = ' + nm) continue while (data1[p2 - 2] != '\n'): p2 -= 1 sval = '' while (data1[p2] != '\"'): sval += data1[p2] p2 += 1 print('value[' + orig + '] = ' + sval) varval = sval.replace(',', '.') try: nval = float(varval) except: nval = -13 if orignames[ind].find('DS') > -1: varval = "%d" % (int(nval) * 1000) ldata[orignames[ind]] = varval #print( len(data.decode('UTF-8') )) test1 = "".join(str(ldata)) insatData = test1.replace('\'', '\"') print(insatData) print(len(names)) if wrok == 0: f = open('insatJson.txt', 'w') f.write(insatData) f.close() wrok = 1 # shutdown() resp = make_response('hello') resp.headers['content-type'] = 'text/html' resp.headers['Access-Control-Allow-Origin'] = '*' resp.headers[ 'Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS' return resp
def unpack_data(self): if self.is_gzipped_html(): data = gzip.decompress(self.packed_data) self._data = data.decode('utf8')
def download(self, link, media_resource=None, path=None, convert_to_vtt=False): """ Downloads a subtitle link (.srt/.vtt file or gzip/zip OpenSubtitles archive link) to the specified directory :param link: Local subtitles file or OpenSubtitles gzip download link :type link: str :param path: Path where the subtitle file will be downloaded (default: temporary file under /tmp) :type path: str :param media_resource: Name of the media resource. If set and if it's a media local file then the subtitles will be saved in the same folder :type media_resource: str :param convert_to_vtt: If set to True, then the downloaded subtitles will be converted to VTT format (default: no conversion) :type convert_to_vtt: bool :returns: dict. Format:: { "filename": "/path/to/subtitle/file.srt" } """ if link.startswith('file://'): link = link[len('file://'):] if os.path.isfile(link): if convert_to_vtt: link = self.to_vtt(link).output return {'filename': link} gzip_content = requests.get(link).content if not path and media_resource: if media_resource.startswith('file://'): media_resource = media_resource[len('file://'):] if os.path.isfile(media_resource): media_resource = os.path.abspath(media_resource) path = os.path.join( os.path.dirname(media_resource), '.'.join( os.path.basename(media_resource).split('.') [:-1])) + '.srt' if path: f = open(path, 'wb') else: f = tempfile.NamedTemporaryFile(prefix='media_subs_', suffix='.srt', delete=False) path = f.name try: with f: f.write(gzip.decompress(gzip_content)) if convert_to_vtt: path = self.to_vtt(path).output except Exception as e: os.unlink(path) raise e return {'filename': path}
def prepareBatchesForExecutionTrace(configDir, executionTraceId, executionSessionId, batchDirectory): try: config = Configuration(configDir) agent = DeepLearningAgent(config, whichGpu=None) sampleCacheDir = config.getKwolaUserDataDirectory("prepared_samples") cacheFile = os.path.join(sampleCacheDir, executionTraceId + ".pickle.gz") if not os.path.exists(cacheFile): addExecutionSessionToSampleCache(executionSessionId, config) cacheHit = False else: cacheHit = True with open(cacheFile, 'rb') as file: sampleBatch = pickle.loads(gzip.decompress(file.read())) imageWidth = sampleBatch['processedImages'].shape[3] imageHeight = sampleBatch['processedImages'].shape[2] # Calculate the crop positions for the main training image if config['training_enable_image_cropping']: randomXDisplacement = random.randint( -config['training_crop_center_random_x_displacement'], config['training_crop_center_random_x_displacement']) randomYDisplacement = random.randint( -config['training_crop_center_random_y_displacement'], config['training_crop_center_random_y_displacement']) cropLeft, cropTop, cropRight, cropBottom = agent.calculateTrainingCropPosition( sampleBatch['actionXs'][0] + randomXDisplacement, sampleBatch['actionYs'][0] + randomYDisplacement, imageWidth, imageHeight) else: cropLeft = 0 cropRight = imageWidth cropTop = 0 cropBottom = imageHeight # Calculate the crop positions for the next state image if config['training_enable_next_state_image_cropping']: nextStateCropCenterX = random.randint(10, imageWidth - 10) nextStateCropCenterY = random.randint(10, imageHeight - 10) nextStateCropLeft, nextStateCropTop, nextStateCropRight, nextStateCropBottom = agent.calculateTrainingCropPosition( nextStateCropCenterX, nextStateCropCenterY, imageWidth, imageHeight, nextStepCrop=True) else: nextStateCropLeft = 0 nextStateCropRight = imageWidth nextStateCropTop = 0 nextStateCropBottom = imageHeight # Crop all the input images and update the action x & action y # This is done at this step because the cropping is random # and thus you don't want to store the randomly cropped version # in the redis cache sampleBatch['processedImages'] = sampleBatch[ 'processedImages'][:, :, cropTop:cropBottom, cropLeft:cropRight] sampleBatch['pixelActionMaps'] = sampleBatch[ 'pixelActionMaps'][:, :, cropTop:cropBottom, cropLeft:cropRight] sampleBatch['rewardPixelMasks'] = sampleBatch[ 'rewardPixelMasks'][:, cropTop:cropBottom, cropLeft:cropRight] sampleBatch['actionXs'] = sampleBatch['actionXs'] - cropLeft sampleBatch['actionYs'] = sampleBatch['actionYs'] - cropTop sampleBatch['nextProcessedImages'] = sampleBatch[ 'nextProcessedImages'][:, :, nextStateCropTop:nextStateCropBottom, nextStateCropLeft:nextStateCropRight] sampleBatch['nextPixelActionMaps'] = sampleBatch[ 'nextPixelActionMaps'][:, :, nextStateCropTop:nextStateCropBottom, nextStateCropLeft:nextStateCropRight] # Add augmentation to the processed images. This is done at this stage # so that we don't store the augmented version in the redis cache. # Instead, we want the pure version in the redis cache and create a # new augmentation every time we load it. processedImage = sampleBatch['processedImages'][0] augmentedImage = agent.augmentProcessedImageForTraining(processedImage) sampleBatch['processedImages'][0] = augmentedImage fileDescriptor, fileName = tempfile.mkstemp(".bin", dir=batchDirectory) with open(fileDescriptor, 'wb') as batchFile: pickle.dump(sampleBatch, batchFile) return fileName, cacheHit except Exception: traceback.print_exc() print("", flush=True) raise
async def load_xml_gz(self, file_path, session): # loads nytimes site map files from disk async with session.get(file_path) as resp: gzip_bin = await resp.read() decoded_gzip = gzip.decompress(gzip_bin) return decoded_gzip
def _decompress(self): self.contents = gzip.decompress(self.raw_contents).decode('utf-8')
def ungzip(data): try: data = gzip.decompress(data) except: pass return data
def load_tar(filename): with tarfile.open(filename) as tar: with tar.extractfile(gs.JOB_SAVE) as f: return pickle.loads(gzip.decompress(f.read()))
def doit(args): global logger, sourcettf, outputdir, fontdir logger = args.logger sourcettf = args.ttfont # Create output directory, including fonts subdirectory, if not present outputdir = args.outputdir os.makedirs(outputdir, exist_ok=True) fontdir = os.path.join(outputdir, 'fonts') os.makedirs(fontdir, exist_ok=True) # Read and save feature mapping for r in args.map: if r[0].startswith('#'): continue elif r[0].startswith('lang='): r[0] = r[0][5:] lang_maps[r[0]] = lang_map(r) else: feat_maps[r[0]] = feat_map(r) # Open and verify input file is a tunable font; extract and parse feat_all from font. font = ttLib.TTFont(sourcettf) raw_data = font.getTableData('Silt') feat_xml = gzip.decompress(raw_data) # .decode('utf-8') root = ET.fromstring(feat_xml) if root.tag != 'all_features': logger.log("Invalid TypeTuner feature file: missing root element", "S") for i, f in enumerate(root.findall('.//feature')): # add to dictionary ttfeat = feat(f, i) feat_all[ttfeat.name] = ttfeat # Open and prepare the xslt file to transform the ftml: xslt = ET.parse(args.xsl) xslt_transform = ET.XSLT(xslt) # Process all ftml files: for arg in args.ftml: for infname in glob(arg): # based on input filename, construct output name # find filename and change extension to html: outfname = os.path.join( outputdir, os.path.splitext(os.path.basename(infname))[0] + '.html') logger.log('Processing: {} -> {}'.format(infname, outfname), 'P') # Each named style in the FTML ultimately maps to a TypeTuned font that will be added via @fontface. # We need to remember the names of the styles and their associated fonts so we can hack the html. sname2font = dict( ) # Indexed by ftml stylename; result is a font object # Parse the FTML ftml_doc = ET.parse(infname) # Adjust <title> to show this is from TypeTuner head = ftml_doc.find('head') title = head.find('title') title.text += " - TypeTuner" # Replace all <fontsrc> elements with two identical from the input font: # One will remain unchanged, the other will eventually be changed to a typetuned font. ET.strip_elements(head, 'fontsrc') fpathname = os.path.relpath(sourcettf, outputdir).replace( '\\', '/') # for css make sure all slashes are forward! head.append( ET.fromstring('<fontsrc>url({})</fontsrc>'.format( fpathname))) # First font head.append( ET.fromstring('<fontsrc>url({})</fontsrc>'.format( fpathname))) # Second font, same as the first # iterate over all the styles in this ftml file, building tuned fonts to match if not already done. for style in head.iter('style'): sname = style.get('name') # e.g. "some_style" feats = style.get( 'feats' ) # e.g "'cv02' 1, 'cv60' 1" -- this we'll parse to get need tt features lang = style.get('lang') # e.g., "sd" font_tag = cache_font(feats, lang, args.norebuild) # font_tag could be None due to errors, but messages should already have been logged # If it is valid, remember how to find this font from the ftml stylename if font_tag: sname2font[sname] = font_tag2font[font_tag] # convert to html via supplied xslt html_doc = xslt_transform(ftml_doc) # Two modifications to make in the html: # 1) add all @fontface specs to the <style> element # 2) Fix up all occurences of <td> elements referencing font2 # Add @fontface to <style> style = html_doc.find('//style') style.text = style.text + '\n' + '\n'.join( [x.fontface for x in sname2font.values()]) # Iterate over all <td> elements looking for font2 and a style or lang indicating feature settings classRE = re.compile(r'string\s+(?:(\w+)\s+)?font2$') for td in html_doc.findall('//td'): tdclass = td.get('class') tdlang = td.get('lang') m = classRE.match(tdclass) if m: sname = m.group(1) if sname: # stylename will get us directly to the font try: td.set( 'class', 'string {}'.format(sname2font[sname].font_tag)) if tdlang: # If there is also a lang attribute, we no longer need it. del td.attrib['lang'] except KeyError: logger.log( "Style name {} not available.".format(sname), "W") elif tdlang: # Otherwise we'll assume there is only the lang attribute try: td.set( 'class', 'string {}'.format(lang2font[tdlang].font_tag)) del td.attrib[ 'lang'] # lang attribute no longer needed. except KeyError: logger.log( "Style for langtag {} not available.".format( tdlang), "W") # Ok -- write the html out! html = ET.tostring(html_doc, pretty_print=True, method='html', encoding='UTF-8') with open(outfname, '+wb') as f: f.write(html)
def __login(self, prelogin_dict, prelt, sp, su, url, cookies=None, door=None): headers = { 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'Origin': 'https://login.sina.com.cn', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*', 'Referer': 'https://login.sina.com.cn/signup/signin.php', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh-HK;q=0.8,zh-TW;q=0.6,zh;q=0.4,en-US;q=0.2,en;q=0.2', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/61.0.3163.91 Safari/537.36' } if cookies: headers['Cookie'] = cookies data_dict = { 'cdult': 3, 'domain': 'sina.com.cn', # 'door': 'mbyqu', 验证码 待定 'encoding': 'UTF-8', 'entry': 'account', 'from': 'null', 'gateway': 1, 'nonce': prelogin_dict['nonce'], 'pagerefer': '', 'prelt': prelt, # prelogin time 'pwencode': 'rsa2', 'returntype': 'TEXT', 'rsakv': prelogin_dict['rsakv'], 'savestate': 0, # 记住天数 'servertime': prelogin_dict['servertime'], 'service': 'account', 'sp': sp, # 密码 'sr': '1920*1080', # 分辨率 'su': su, # 用户名 'useticket': 0, 'vsnf': 1, } if door: data_dict['door'] = door data_str = '' for k in data_dict: data_str += k + '=' + str(data_dict[k]) + '&' data = data_str[:-1].encode() post_request = request.Request(url, data=data, headers=headers, origin_req_host='login.sina.com.cn', method='POST') post_resp = request.urlopen(post_request) if post_resp.getcode() == 200: read = post_resp.read() if post_resp.getheader('Content-Encoding') == 'gzip': read = gzip.decompress(read) rtn_json = read.decode() rtn_dict = json.loads(rtn_json) cookies_str = '' if rtn_dict['retcode'] == '0': # 登陆成功 cookies_str = post_resp.getheader('Set-Cookie') cookies_str = self.__process_cookies_str(cookies_str) elif rtn_dict['retcode'] == '4049' or rtn_dict['retcode'] == '2070': # 需要验证码 或 验证码不正确 door_headers = headers.copy() door_headers[ 'Accept'] = 'image/webp,image/apng,image/*,*/*;q=0.8' door_headers.pop('Origin') door_headers.pop('Content-Type') door_rnd = random.randint(10000000, 99999999) door_url = 'https://login.sina.com.cn/cgi/pin.php?r=%s&s=0' % str( door_rnd) door_req = request.Request(door_url, headers=door_headers, origin_req_host='login.sina.com.cn', method='GET') door_resp = request.urlopen(door_req) if door_resp.getcode() == 200: cookies_str = door_resp.getheader('Set-Cookie') png = door_resp.read() door = recognize(png) _cookies = self.__process_cookies_str(cookies_str) return self.__login(prelogin_dict, prelt, sp, su, url, _cookies, door) else: raise RuntimeError('未知错误,返回消息为: %s' % rtn_dict) return cookies_str else: raise RuntimeError('status: %s,url: %s' % (post_resp.getcode(), post_resp.geturl()))
def __init__(self, **kwargs): """Create the Tree from SVG ``text``.""" bytestring = kwargs.get('bytestring') file_obj = kwargs.get('file_obj') url = kwargs.get('url') unsafe = kwargs.get('unsafe') parent = kwargs.get('parent') parent_children = kwargs.get('parent_children') tree_cache = kwargs.get('tree_cache') element_id = None self.url_fetcher = kwargs.get('url_fetcher', fetch) if bytestring is not None: self.url = url elif file_obj is not None: bytestring = file_obj.read() self.url = getattr(file_obj, 'name', None) if self.url == '<stdin>': self.url = None elif url is not None: parent_url = parent.url if parent else None parsed_url = parse_url(url, parent_url) if parsed_url.fragment: self.url = urlunparse(parsed_url[:-1] + ('', )) element_id = parsed_url.fragment else: self.url = parsed_url.geturl() element_id = None self.url = self.url or None else: raise TypeError( 'No input. Use one of bytestring, file_obj or url.') self_is_parent = ((parent and self.url == parent.url) or (url and url.startswith('#') and not self.url)) if self_is_parent: root_parent = parent while root_parent.parent is not None: root_parent = root_parent.parent tree = root_parent.xml_tree else: if not bytestring: bytestring = self.fetch_url(parse_url(self.url), 'image/svg+xml') if len(bytestring) >= 2 and bytestring[:2] == b'\x1f\x8b': bytestring = gzip.decompress(bytestring) tree = ElementTree.fromstring(bytestring, forbid_entities=not unsafe, forbid_external=not unsafe) self.xml_tree = tree root = cssselect2.ElementWrapper.from_xml_root(tree) style = parent.style if parent else css.parse_stylesheets(self, url) if element_id: for element in root.iter_subtree(): if element.id == element_id: root = element self.xml_tree = element.etree_element break else: raise TypeError( 'No tag with id="{}" found.'.format(element_id)) super().__init__(root, style, self.url_fetcher, parent, parent_children, self.url, unsafe) self.root = True if tree_cache is not None and self.url: tree_cache[(self.url, self.get('id'))] = self
import websocket import json import gzip if __name__ == "__main__": websocket.enableTrace(True) WEBSOCKET_MARKET_HOST = 'wss://ws.wbfex.com/kline-api/ws' # 市场api #ws = websocket.create_connection("ws://echo.websocket.org/") ws = websocket.create_connection(WEBSOCKET_MARKET_HOST) print("Sending 'Hello, World'...") params = {"channel": "market_" + "ethusdt" + "_ticker", "cb_id": 150} req = {"event": "sub", 'params': params} #ws.send("Hello, World") #print(json.dumps(req)) ws.send(json.dumps(req)) #exit(0) print("Sent") print("Receiving...") while 1: result = ws.recv() if isinstance(result, bytes): print('bytes') data = gzip.decompress(result) #recv = str(result, encoding = "utf-8") print(data) print("Received '%s'" % result) ws.close()