def _decode_content(content, headers): """Decodes content.""" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html encodings = headers.get('content-encoding', '').split(',') # If multiple encodings have been applied to an entity, the content # codings MUST be listed in the order in which they were applied. if 'gzip' in encodings: try: content = GzipFile(fileobj=StringIO(content)).read() except IOError: # pylint:disable=protected-access # Decompressing partial content by skipping checksum comparison. GzipFile._read_eof = lambda *args, **kwargs: None # pylint:enable=protected-access content = GzipFile(fileobj=StringIO(content)).read() # elif 'compress' in encodings: pass # elif 'deflate' in encodings: pass charset = _get_charset(headers) if charset is not None: try: content = content.decode(charset).encode('utf-8') except UnicodeDecodeError as ex: # pylint: disable=unused-variable pass return content.decode('utf-8')
def receive_message(self, sub): multipart = sub.recv_multipart() try: contents = b''.join(multipart[1:]) contents = GzipFile('','r',0,BytesIO(contents)).read() contents = contents.decode('UTF-8') except: contents = b''.join(multipart[1:]) contents = contents.decode('UTF-8') try: self.parse_message(contents) except: print(contents) raise
def post(self, request, *args, **kwargs): payload = request.body if not payload: data = payload else: content_encoding = request.META.get('HTTP_CONTENT_ENCODING', None) if content_encoding: # try to decompress the payload. if content_encoding == "deflate" \ or "santa" in self.user_agent and content_encoding == "zlib" \ or self.user_agent == "Zentral/mnkpf 0.1" and content_encoding == "gzip": payload = zlib.decompress(payload) elif content_encoding == "gzip": payload = GzipFile(fileobj=request).read() else: return HttpResponse("Unsupported Media Type", status=415) try: payload = payload.decode(self.payload_encoding) except UnicodeDecodeError: err_msg_tmpl = 'Could not decode payload with encoding %s' logger.error(err_msg_tmpl, self.payload_encoding, extra={'request': request}) raise SuspiciousOperation(err_msg_tmpl % self.payload_encoding) try: data = json.loads(payload) except ValueError: raise SuspiciousOperation("Payload is not valid json") try: self.check_data_secret(data) except APIAuthError as auth_err: logger.error("APIAuthError %s", auth_err, extra={'request': request}) return HttpResponseForbidden(str(auth_err)) response_data = self.do_post(data) return JsonResponse(response_data)
def Query(aSelf, aDomain, aTimeout=10, aRetryCnt=5): if not aDomain: return None elif aDomain in aSelf.m_dictCache.keys(): logging.info("{}: Cache hit".format(aDomain)) return aSelf.m_dictCache[aDomain] else: while aRetryCnt > 0: try: params = urllib.parse.urlencode({"domain": aDomain}) req = urllib.request.Request( "http://www.threatcrowd.org/searchApi/v2/domain/report/?{}" .format(params)) rsp = urllib.request.urlopen(req, None, aTimeout) strEncoding = rsp.info().get("Content-Encoding") if strEncoding and strEncoding.lower() == "gzip": result = GzipFile(fileobj=rsp).read() else: result = rsp.read() result = result.decode("utf-8") if result else "<NULL>" #result = r'{"response_code":"1","resolutions":[{"last_resolved":"2014-11-24","ip_address":"74.125.230.68"},{"last_resolved":"2014-12-13","ip_address":"173.194.67.101"},{"last_resolved":"2015-11-24","ip_address":"74.125.230.68"},{"last_resolved":"2015-01-01","ip_address":"216.58.208.32"},{"last_resolved":"2015-02-02","ip_address":"74.125.235.206"},{"last_resolved":"2015-02-03","ip_address":"216.58.208.46"},{"last_resolved":"2015-02-04","ip_address":"216.58.210.46"},{"last_resolved":"2014-07-15","ip_address":"64.233.183.139"},{"last_resolved":"2014-08-28","ip_address":"64.233.182.102"},{"last_resolved":"2014-07-31","ip_address":"64.233.182.101"},{"last_resolved":"2014-08-12","ip_address":"64.233.181.139"},{"last_resolved":"2013-07-19","ip_address":"173.194.66.100"},{"last_resolved":"2014-12-16","ip_address":"173.194.116.100"},{"last_resolved":"2014-12-23","ip_address":"185.50.69.10"},{"last_resolved":"2013-07-26","ip_address":"173.194.78.113"},{"last_resolved":"2014-10-18","ip_address":"173.194.45.224"},{"last_resolved":"2013-08-16","ip_address":"173.194.40.128"},{"last_resolved":"2013-08-23","ip_address":"173.194.40.130"},{"last_resolved":"2014-10-15","ip_address":"74.125.229.128"},{"last_resolved":"2015-02-18","ip_address":"216.58.217.142"},{"last_resolved":"2015-02-26","ip_address":"74.125.236.36"},{"last_resolved":"2013-10-14","ip_address":"212.140.233.53"},{"last_resolved":"2013-11-28","ip_address":"62.253.3.93"},{"last_resolved":"2013-07-30","ip_address":"173.194.41.163"},{"last_resolved":"2014-06-20","ip_address":"206.111.1.122"},{"last_resolved":"2013-08-10","ip_address":"173.194.34.101"},{"last_resolved":"2015-02-06","ip_address":"216.58.219.142"},{"last_resolved":"2014-09-10","ip_address":"173.194.121.34"},{"last_resolved":"2014-10-14","ip_address":"74.125.229.226"},{"last_resolved":"2014-11-07","ip_address":"74.125.225.3"},{"last_resolved":"2013-08-02","ip_address":"173.194.34.66"},{"last_resolved":"2013-08-01","ip_address":"173.194.34.161"},{"last_resolved":"2014-10-04","ip_address":"74.125.225.104"},{"last_resolved":"2013-10-03","ip_address":"173.194.34.102"},{"last_resolved":"2014-03-03","ip_address":"62.253.3.113"},{"last_resolved":"2014-11-09","ip_address":"173.194.46.103"},{"last_resolved":"2014-11-06","ip_address":"173.194.125.41"},{"last_resolved":"2014-10-26","ip_address":"173.194.37.5"},{"last_resolved":"2014-10-07","ip_address":"74.125.229.232"},{"last_resolved":"2014-09-11","ip_address":"173.194.121.41"},{"last_resolved":"2014-09-09","ip_address":"173.194.121.36"},{"last_resolved":"2014-08-22","ip_address":"173.194.121.37"},{"last_resolved":"2014-01-17","ip_address":"173.194.34.129"},{"last_resolved":"2013-11-03","ip_address":"173.194.41.72"},{"last_resolved":"2013-11-02","ip_address":"173.194.41.64"},{"last_resolved":"2013-09-28","ip_address":"173.194.34.97"},{"last_resolved":"2013-09-27","ip_address":"173.194.34.100"},{"last_resolved":"2014-12-22","ip_address":"74.125.137.100"},{"last_resolved":"2014-12-03","ip_address":"173.194.125.9"},{"last_resolved":"2014-11-15","ip_address":"173.194.125.66"},{"last_resolved":"2015-01-30","ip_address":"216.58.216.206"}],"hashes":["000269bab6833d37ca78b7445c9a3373","00032b34bc1b54e0fc807d868356bd29","0005dc85113a714bb13741ffd0cc0a09","0005f36601ca5acf335c2291aae77cc6","0006d38b765eea58c3ce7a3aedf77095","000ab25117792150a13a087a265d46c8","000ac527f5f9b223f093a74fc9e28bff","000b6f4e0c4ed2f3a48dcf2c1b01cecc","000bfa648b2d26acfc3ab12a903b749a","000de2e9973823f7613b3bbb4c3b6abe","0013d79a7550b053894335f0ecd253ef","001f7e981e87b887db67a60f297253b2","0024c7149b256a6b678810189cc5914c","00294f530951831b3ed394cb06115969","002b60c52d7570a40807d23bf4bd059d","002d5e98f2c6b3bd3b010a7a9e45dc6c","002ee2db250940a1a3ec6f772d6697ae","002f9189ff9fc0cff0acf6b7115d9ce8","003095222bfa1e91a0adf62c941edbc1","0032a9625396ec0eb1b604f82d71b087","00334d4def120132663f58f3589b6087","003638d7d2b9f6f6f0ab48c8d7cb71ea","0036e101d1fe85681e1139b8cc654525","003fc92bf9c8c933b6f32e708f0a1d2c","0043e39e24590149441fba4d8091caa4","004a95dfc236054fac217e0a00db6eb7","004e0b19513b76d70a1408ffd441b960","00573225d6c2f6f0df925f1ad5b840ee","005854a029473ee91bf612927bf641bb","0067868109c1280e63b3c95ed14194f5","006d0ffd3b1d3d76ec75608d51653f9c","00709b7c5c91f5bb043bfb61eab8b31d","00729a127bc2ca8cd5439fa1c4ef3225","0072de37e1b15943d6289a63b19bef1f","00732f18727e5a406d8e6308d37beef6","00742faf994a8410dc4712ce9a62a523","00747b8b4434328a14f2b01076401547","0074f5fe7a38106d9ab66f188a8c30ea","00758e0782742b9e7654b8334e6a65fc","00785d3ed44c5242394b335d08bcb622","007ab2359d4cc46b0c57c8d6c222f18f","007c2bc54302446e8b413cd93e4137f5","007de67e18c4a12aa39f1a66c33df377","007e2f45ffe5a446393fce2790c7da1d","007f17a835a8c33a87f7aa4db0fef224","00806d510017d099f18ed0e5ebf1fa4f","00820ff07976943ebe6604f6dc7fc90c","0082f0dd6f5440f253d44f36fb06a487","00831e473b1816a19fbd42f2c41ba6f6","0084747bb4ec11b5a27ba7fe3db53c87"],"emails":["*****@*****.**"],"references":[],"permalink":"https:\/\/www.threatcrowd.org\/domain.php?domain=google.com"}' aSelf.m_strRawResult = result return aSelf.Parse(aDomain, result) except (urllib.error.HTTPError, urllib.error.URLError, http.client.HTTPException) as err: logging.warning(err) aRetryCnt -= 1 except Exception as err: print(traceback.format_exc()) logging.exception(err) break return None
def Query( aSelf , aHash , aTimeout = 10 , aRetryCnt = 5 ) : if not aHash : return None elif aHash in aSelf.m_dictCache.keys() : logging.info( "{}: Cache hit".format(aHash) ) return aSelf.m_dictCache[aHash] else : while aRetryCnt > 0 : try : params = urllib.parse.urlencode( { "apikey" : aSelf.m_strApiKey , "resource" : aHash } ) req = urllib.request.Request( "https://www.virustotal.com/vtapi/v2/file/report" , headers = aSelf.m_strHttpHeaders ) rsp = urllib.request.urlopen( req , params.encode("utf-8") , aTimeout ) strEncoding = rsp.info().get( "Content-Encoding" ) if strEncoding and strEncoding.lower() == "gzip" : result = GzipFile( fileobj = rsp ).read() else : result = rsp.read() if result : result = json.loads( result.decode( "utf-8" ) ) else : print( "Seems quota reach...sleep 30 seconds" ) time.sleep( 30 ) continue aSelf.m_strRawResult = result return aSelf.Parse( aHash , result ) except ( urllib.error.HTTPError , urllib.error.URLError , http.client.HTTPException ) as err : logging.warning( err ) aRetryCnt -= 1 except Exception as err : print( traceback.format_exc() ) logging.exception( err ) break return None
def send_request(url): ''' Send requests with Requests ''' q = Request(url) # Support websites that force TLSv1.2 sslcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) q.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3029.110 Safari/537.36' ) q.add_header( 'Accept', 'text/html,\ application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8') q.add_header('Accept-Language', 'en-US,en;q=0.8') q.add_header('Accept-Encoding', 'gzip') q.add_header('Cookie', args.cookies) response = urlopen(q, context=sslcontext) if response.info().get('Content-Encoding') == 'gzip': data = GzipFile(fileobj=readBytesCustom(response.read())).read() elif response.info().get('Content-Encoding') == 'deflate': data = response.read().read() else: data = response.read() return data.decode('utf-8', 'replace')
def Query( aSelf , aDomain , aTimeout = 10 , aRetryCnt = 5 ) : if not aDomain : return None elif aDomain in aSelf.m_dictCache.keys() : logging.info( "{}: Cache hit".format(aDomain) ) return aSelf.m_dictCache[aDomain] else : while aRetryCnt > 0 : try : params = urllib.parse.urlencode( {"domain" : aDomain} ) req = urllib.request.Request( "http://www.threatcrowd.org/searchApi/v2/domain/report/?{}".format(params) ) rsp = urllib.request.urlopen( req , None , aTimeout ) strEncoding = rsp.info().get( "Content-Encoding" ) if strEncoding and strEncoding.lower() == "gzip" : result = GzipFile( fileobj = rsp ).read() else : result = rsp.read() result = result.decode( "utf-8" ) if result else "<NULL>" #result = r'{"response_code":"1","resolutions":[{"last_resolved":"2014-11-24","ip_address":"74.125.230.68"},{"last_resolved":"2014-12-13","ip_address":"173.194.67.101"},{"last_resolved":"2015-11-24","ip_address":"74.125.230.68"},{"last_resolved":"2015-01-01","ip_address":"216.58.208.32"},{"last_resolved":"2015-02-02","ip_address":"74.125.235.206"},{"last_resolved":"2015-02-03","ip_address":"216.58.208.46"},{"last_resolved":"2015-02-04","ip_address":"216.58.210.46"},{"last_resolved":"2014-07-15","ip_address":"64.233.183.139"},{"last_resolved":"2014-08-28","ip_address":"64.233.182.102"},{"last_resolved":"2014-07-31","ip_address":"64.233.182.101"},{"last_resolved":"2014-08-12","ip_address":"64.233.181.139"},{"last_resolved":"2013-07-19","ip_address":"173.194.66.100"},{"last_resolved":"2014-12-16","ip_address":"173.194.116.100"},{"last_resolved":"2014-12-23","ip_address":"185.50.69.10"},{"last_resolved":"2013-07-26","ip_address":"173.194.78.113"},{"last_resolved":"2014-10-18","ip_address":"173.194.45.224"},{"last_resolved":"2013-08-16","ip_address":"173.194.40.128"},{"last_resolved":"2013-08-23","ip_address":"173.194.40.130"},{"last_resolved":"2014-10-15","ip_address":"74.125.229.128"},{"last_resolved":"2015-02-18","ip_address":"216.58.217.142"},{"last_resolved":"2015-02-26","ip_address":"74.125.236.36"},{"last_resolved":"2013-10-14","ip_address":"212.140.233.53"},{"last_resolved":"2013-11-28","ip_address":"62.253.3.93"},{"last_resolved":"2013-07-30","ip_address":"173.194.41.163"},{"last_resolved":"2014-06-20","ip_address":"206.111.1.122"},{"last_resolved":"2013-08-10","ip_address":"173.194.34.101"},{"last_resolved":"2015-02-06","ip_address":"216.58.219.142"},{"last_resolved":"2014-09-10","ip_address":"173.194.121.34"},{"last_resolved":"2014-10-14","ip_address":"74.125.229.226"},{"last_resolved":"2014-11-07","ip_address":"74.125.225.3"},{"last_resolved":"2013-08-02","ip_address":"173.194.34.66"},{"last_resolved":"2013-08-01","ip_address":"173.194.34.161"},{"last_resolved":"2014-10-04","ip_address":"74.125.225.104"},{"last_resolved":"2013-10-03","ip_address":"173.194.34.102"},{"last_resolved":"2014-03-03","ip_address":"62.253.3.113"},{"last_resolved":"2014-11-09","ip_address":"173.194.46.103"},{"last_resolved":"2014-11-06","ip_address":"173.194.125.41"},{"last_resolved":"2014-10-26","ip_address":"173.194.37.5"},{"last_resolved":"2014-10-07","ip_address":"74.125.229.232"},{"last_resolved":"2014-09-11","ip_address":"173.194.121.41"},{"last_resolved":"2014-09-09","ip_address":"173.194.121.36"},{"last_resolved":"2014-08-22","ip_address":"173.194.121.37"},{"last_resolved":"2014-01-17","ip_address":"173.194.34.129"},{"last_resolved":"2013-11-03","ip_address":"173.194.41.72"},{"last_resolved":"2013-11-02","ip_address":"173.194.41.64"},{"last_resolved":"2013-09-28","ip_address":"173.194.34.97"},{"last_resolved":"2013-09-27","ip_address":"173.194.34.100"},{"last_resolved":"2014-12-22","ip_address":"74.125.137.100"},{"last_resolved":"2014-12-03","ip_address":"173.194.125.9"},{"last_resolved":"2014-11-15","ip_address":"173.194.125.66"},{"last_resolved":"2015-01-30","ip_address":"216.58.216.206"}],"hashes":["000269bab6833d37ca78b7445c9a3373","00032b34bc1b54e0fc807d868356bd29","0005dc85113a714bb13741ffd0cc0a09","0005f36601ca5acf335c2291aae77cc6","0006d38b765eea58c3ce7a3aedf77095","000ab25117792150a13a087a265d46c8","000ac527f5f9b223f093a74fc9e28bff","000b6f4e0c4ed2f3a48dcf2c1b01cecc","000bfa648b2d26acfc3ab12a903b749a","000de2e9973823f7613b3bbb4c3b6abe","0013d79a7550b053894335f0ecd253ef","001f7e981e87b887db67a60f297253b2","0024c7149b256a6b678810189cc5914c","00294f530951831b3ed394cb06115969","002b60c52d7570a40807d23bf4bd059d","002d5e98f2c6b3bd3b010a7a9e45dc6c","002ee2db250940a1a3ec6f772d6697ae","002f9189ff9fc0cff0acf6b7115d9ce8","003095222bfa1e91a0adf62c941edbc1","0032a9625396ec0eb1b604f82d71b087","00334d4def120132663f58f3589b6087","003638d7d2b9f6f6f0ab48c8d7cb71ea","0036e101d1fe85681e1139b8cc654525","003fc92bf9c8c933b6f32e708f0a1d2c","0043e39e24590149441fba4d8091caa4","004a95dfc236054fac217e0a00db6eb7","004e0b19513b76d70a1408ffd441b960","00573225d6c2f6f0df925f1ad5b840ee","005854a029473ee91bf612927bf641bb","0067868109c1280e63b3c95ed14194f5","006d0ffd3b1d3d76ec75608d51653f9c","00709b7c5c91f5bb043bfb61eab8b31d","00729a127bc2ca8cd5439fa1c4ef3225","0072de37e1b15943d6289a63b19bef1f","00732f18727e5a406d8e6308d37beef6","00742faf994a8410dc4712ce9a62a523","00747b8b4434328a14f2b01076401547","0074f5fe7a38106d9ab66f188a8c30ea","00758e0782742b9e7654b8334e6a65fc","00785d3ed44c5242394b335d08bcb622","007ab2359d4cc46b0c57c8d6c222f18f","007c2bc54302446e8b413cd93e4137f5","007de67e18c4a12aa39f1a66c33df377","007e2f45ffe5a446393fce2790c7da1d","007f17a835a8c33a87f7aa4db0fef224","00806d510017d099f18ed0e5ebf1fa4f","00820ff07976943ebe6604f6dc7fc90c","0082f0dd6f5440f253d44f36fb06a487","00831e473b1816a19fbd42f2c41ba6f6","0084747bb4ec11b5a27ba7fe3db53c87"],"emails":["*****@*****.**"],"references":[],"permalink":"https:\/\/www.threatcrowd.org\/domain.php?domain=google.com"}' aSelf.m_strRawResult = result return aSelf.Parse( aDomain , result ) except ( urllib.error.HTTPError , urllib.error.URLError , http.client.HTTPException ) as err : logging.warning( err ) aRetryCnt -= 1 except Exception as err : print( traceback.format_exc() ) logging.exception( err ) break return None
def read(self, url): """ Read object from s3 """ parts = self.urlparse(url) response = self.get_object(parts['bucket'], parts['key']) body = response['Body'].read() if op.splitext(parts['key'])[1] == '.gz': body = GzipFile(None, 'rb', fileobj=BytesIO(body)).read() return body.decode('utf-8')
def read_from_s3(bucket, key): """ Download object from S3 as JSON """ s3 = boto3.client('s3') response = s3.get_object(Bucket=bucket, Key=key) body = response['Body'].read() if op.splitext(key)[1] == '.gz': body = GzipFile(None, 'rb', fileobj=BytesIO(body)).read() return body.decode('utf-8')
def default_unzip_s3_object_handler_function(response): """ Utility to unzip S3 object """ bytestream = BytesIO(response["Body"].read()) raw_object = GzipFile(None, "rb", fileobj=bytestream).read() try: # decode if allowed return_object = raw_object.decode("utf-8") except AttributeError: return_object = raw_object return json.loads(return_object)
class OAGDClient(OGDClient): def __init__(self): super().__init__() self._json = None self.data = b"" def build_url(self, path, **kwargs): url = "{0}{1}".format(self.url_prefix(), path) if kwargs: url += "?" + urlencode(kwargs) return url def handle_response(self, response): self._json = None self.data = response.read() # print(dir(response.headers)) try: getheader = response.headers.getheader except AttributeError: getheader = response.getheader content_encoding = getheader("content-encoding", "").lower() if content_encoding == "gzip": # data = zlib.decompress(data) fake_stream = StringIO(self.data) self.data = GzipFile(fileobj=fake_stream).read() def json_response(self): if self._json is None: self._json = json.loads(self.data.decode("UTF-8")) return self._json def get(self, key, default): doc = self.json_response() return doc.get(key, default) def get_request(self, url): request = Request(url) print("get_request:", url) request.add_header("Accept-Encoding", "gzip") response = self.opener().open(request) return self.handle_response(response) def rate_variant(self, variant_uuid, like=None, work=None): params = { "game": variant_uuid, } if like is not None: params["like"] = like if work is not None: params["work"] = work url = self.build_url("/api/1/rate_game", **params) self.get_request(url)
def try_decompress(data): """ Try various decompression algorithms on `data`, return the decompressed output """ for encoding in "zlib zip bz2 gzip".split(): try: if encoding == "gzip" and GzipFile is not None: data = GzipFile(fileobj=StringIO(data)).read() continue data = data.decode(encoding) except (zliberror, IOError, LookupError): pass return data
def open(self, url, data=''): data = urlencode(data) if not IS_PY2: data = data.encode() request = Request('http://f.10086.cn/%s' % url, data=data) htm = self.opener.open(request).read() try: htm = GzipFile(fileobj=StringIO(htm)).read() finally: if IS_PY2: return htm else: return htm.decode()
def GET(url): req = urllib2.Request(url) req.add_header("Accept-Encoding", "gzip, deflate") res = urllib2.urlopen(req) if res.headers.get("content-encoding") == "gzip": data = GzipFile(fileobj=StringIO(res.read()),mode="r") elif res.headers.get("content-encoding") == "deflate": data = StringIO(deflate(res.read())) else: data = res.read() try: encoding = res.headers.get('Content-Type').split("charset=")[1] except IndexError: encoding = 'ISO-8859-1' return data.decode(encoding).encode('utf-8')
def GET(url): req = urllib2.Request(url) req.add_header("Accept-Encoding", "gzip, deflate") res = urllib2.urlopen(req) if res.headers.get("content-encoding") == "gzip": data = GzipFile(fileobj=StringIO(res.read()), mode="r") elif res.headers.get("content-encoding") == "deflate": data = StringIO(deflate(res.read())) else: data = res.read() try: encoding = res.headers.get('Content-Type').split("charset=")[1] except IndexError: encoding = 'ISO-8859-1' return data.decode(encoding).encode('utf-8')
def fetch_json_attempt(self, url): request = Request(url) request.add_header("Accept-Encoding", "gzip") response = self.opener().open(request) # print(response.headers) data = response.read() try: getheader = response.headers.getheader except AttributeError: getheader = response.getheader content_encoding = getheader("content-encoding", "").lower() if content_encoding == "gzip": fake_stream = StringIO(data) data = GzipFile(fileobj=fake_stream).read() return data, json.loads(data.decode("UTF-8"))
def Query(aSelf, aHash, aTimeout=10, aRetryCnt=5): if not aHash: return None elif aHash in aSelf.m_dictCache.keys(): logging.info("{}: Cache hit".format(aHash)) return aSelf.m_dictCache[aHash] else: strHashType = "" if aSelf.reSha256.match(aHash): strHashType = "sha256" elif aSelf.reSha1.match(aHash): strHashType = "sha1" elif aSelf.reMd5.match(aHash): strHashType = "md5" else: logging.error("Hash type is invalid") return None while aRetryCnt > 0: try: params = urllib.parse.urlencode({ "api_key": aSelf.m_strApiKey, strHashType: aHash }) req = urllib.request.Request( "https://detux.org/api/report.php", headers=aSelf.m_strHttpHeaders) rsp = urllib.request.urlopen(req, params.encode("utf-8"), aTimeout) strEncoding = rsp.info().get("Content-Encoding") if strEncoding and strEncoding.lower() == "gzip": result = GzipFile(fileobj=rsp).read() else: result = rsp.read() result = result.decode("utf-8") if result else "<NULL>" aSelf.m_strRawResult = result return aSelf.Parse(aHash, result) except (urllib.error.HTTPError, urllib.error.URLError, http.client.HTTPException) as err: logging.warning(err) aRetryCnt -= 1 except Exception as err: print(traceback.format_exc()) logging.exception(err) break return None
def send_request(url): ''' Send requests with Requests ''' print('here') q = Request(url) q.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' ) q.add_header( 'Accept', 'text/html,\ application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8') q.add_header('Accept-Language', 'en-US,en;q=0.8') q.add_header('Accept-Encoding', 'gzip') q.add_header('Cookie', args.cookies) try: print('1') sslcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) response = urlopen(q, timeout=args.timeout, context=sslcontext) except: try: print('2') sslcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) response = urlopen(q, timeout=args.timeout, context=sslcontext) except: try: print('3') ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE response = urlopen(q, timeout=args.timeout, context=ctx) except: traceback.print_exc() print(response) if response.info().get('Content-Encoding') == 'gzip': data = GzipFile(fileobj=readBytesCustom(response.read())).read() elif response.info().get('Content-Encoding') == 'deflate': data = response.read().read() else: data = response.read() print(data) return data.decode('utf-8', 'replace')
def fetch(url, data=None): urls = urlparse.urlparse(url) if data != None: data = urllib.urlencode(data) http = urllib2.urlopen(urllib2.Request( url, data=data, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://%s' % (urls.hostname) }), timeout=10) content = '' if (http.code == 200): content_encoding = http.headers.get('Content-Encoding') content_type = http.headers.get('Content-Type') if content_encoding == 'gzip': content = GzipFile(fileobj=StringIO(http.read()), mode='r').read() elif content_encoding == 'deflate': content = StringIO(deflate(http.read())).getvalue() else: content = http.read() charset = 'utf-8' re_charset = re.compile(r'''charset=([^$]+)''', re.I).findall(content_type) if re_charset != None and len(re_charset) > 0: charset = re_charset[0] elif len(content) > 0: re_charset = re.compile(r'''<meta[^>]+charset=([^'"]+)''', re.I).findall(content_type) if re_charset != None and len(re_charset) > 0: charset = re_charset[0] if charset != 'utf-8': content = content.decode(charset, 'ignore').encode('utf-8') return content
def http_response(self, req, resp): urllib2.HTTPCookieProcessor.http_response(self, req, resp) data = resp.read() if 200 <= resp.code < 300: # gzip if resp.headers.get('Content-Encoding') == 'gzip': log('un-gzip') data = GzipFile(fileobj=StringIO(data), mode='r').read() if 200 <= resp.code < 300 and resp.info().maintype == 'text': # <meta> redirect if resp.info().type in MIMETYPE['html']: match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data) if match: new_url = match.groups()[0] log('redirect: %s' % new_url) new_headers = dict((k, v) for k, v in req.headers.items() if k.lower() not in ('content-length', 'content-type')) new = urllib2.Request(new_url, headers=new_headers, origin_req_host=req.get_origin_req_host(), unverifiable=True) return self.parent.open(new, timeout=req.timeout) # encoding enc = detect_encoding(data, resp) if enc: data = data.decode(enc, 'replace') if not self.decode: data = data.encode(enc) fp = StringIO(data) old_resp = resp resp = urllib2.addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg return resp
def view_execute_import(self, request): request.assert_valid_csrf_token() cache = request.browser_session['account-statement'] binary = BytesIO(b64decode(cache['data'])) xml = GzipFile(filename='', mode='r', fileobj=binary).read() xml = xml.decode('utf-8') invoice = cache['invoice'] transactions = list( match_iso_20022_to_usernames(xml, request.session, period_id=invoice)) users = dict( request.session.query(User).with_entities(User.username, User.id)) payments = { users[t.username]: t for t in transactions if t.state == 'success' } if payments: invoices = request.app.invoice_collection(period_id=invoice) invoices = invoices.query() invoices = invoices.filter(Invoice.user_id.in_(payments.keys())) for invoice in invoices.options(joinedload(Invoice.items)): for item in invoice.items: item.tid = payments[invoice.user_id].tid item.source = 'xml' item.paid = True request.success( _("Imported ${count} payments", mapping={'count': len(payments)})) else: request.alert(_("No payments could be imported")) del request.browser_session['account-statement'] @request.after def redirect_intercooler(response): response.headers.add('X-IC-Redirect', request.link(self))
def Query(aSelf, aHash, aTimeout=10, aRetryCnt=5): if not aHash: return None elif aHash in aSelf.m_dictCache.keys(): logging.info("{}: Cache hit".format(aHash)) return aSelf.m_dictCache[aHash] else: while aRetryCnt > 0: try: params = urllib.parse.urlencode({ "apikey": aSelf.m_strApiKey, "resource": aHash }) req = urllib.request.Request( "https://www.virustotal.com/vtapi/v2/file/report", headers=aSelf.m_strHttpHeaders) rsp = urllib.request.urlopen(req, params.encode("utf-8"), aTimeout) strEncoding = rsp.info().get("Content-Encoding") if strEncoding and strEncoding.lower() == "gzip": result = GzipFile(fileobj=rsp).read() else: result = rsp.read() if result: result = json.loads(result.decode("utf-8")) else: print("Seems quota reach...sleep 30 seconds") time.sleep(30) continue aSelf.m_strRawResult = result return aSelf.Parse(aHash, result) except (urllib.error.HTTPError, urllib.error.URLError, http.client.HTTPException) as err: logging.warning(err) aRetryCnt -= 1 except Exception as err: print(traceback.format_exc()) logging.exception(err) break return None
def Query( aSelf , aHash , aTimeout = 10 , aRetryCnt = 5 ) : if not aHash : return None elif aHash in aSelf.m_dictCache.keys() : logging.info( "{}: Cache hit".format(aHash) ) return aSelf.m_dictCache[aHash] else : strHashType = "" if aSelf.reSha256.match(aHash) : strHashType = "sha256" elif aSelf.reSha1.match(aHash) : strHashType = "sha1" elif aSelf.reMd5.match(aHash) : strHashType = "md5" else : logging.error( "Hash type is invalid" ) return None while aRetryCnt > 0 : try : params = urllib.parse.urlencode( { "api_key" : aSelf.m_strApiKey , strHashType : aHash } ) req = urllib.request.Request( "https://detux.org/api/report.php" , headers = aSelf.m_strHttpHeaders ) rsp = urllib.request.urlopen( req , params.encode("utf-8") , aTimeout ) strEncoding = rsp.info().get( "Content-Encoding" ) if strEncoding and strEncoding.lower() == "gzip" : result = GzipFile( fileobj = rsp ).read() else : result = rsp.read() result = result.decode( "utf-8" ) if result else "<NULL>" aSelf.m_strRawResult = result return aSelf.Parse( aHash , result ) except ( urllib.error.HTTPError , urllib.error.URLError , http.client.HTTPException ) as err : logging.warning( err ) aRetryCnt -= 1 except Exception as err : print( traceback.format_exc() ) logging.exception( err ) break return None
def main(): baseurl = '' p = optparse.OptionParser('%prog [(filename|url) [encoding]]', version='%prog ' + __version__) p.add_option("--ignore-emphasis", dest="ignore_emphasis", action="store_true", default=IGNORE_EMPHASIS, help="don't include any formatting for emphasis") p.add_option("--ignore-links", dest="ignore_links", action="store_true", default=IGNORE_ANCHORS, help="don't include any formatting for links") p.add_option("--ignore-images", dest="ignore_images", action="store_true", default=IGNORE_IMAGES, help="don't include any formatting for images") p.add_option("-g", "--google-doc", action="store_true", dest="google_doc", default=False, help="convert an html-exported Google Document") p.add_option("-d", "--dash-unordered-list", action="store_true", dest="ul_style_dash", default=False, help="use a dash rather than a star for unordered list items") p.add_option("-e", "--asterisk-emphasis", action="store_true", dest="em_style_asterisk", default=False, help="use an asterisk rather than an underscore " + "for emphasized text") p.add_option("-b", "--body-width", dest="body_width", action="store", type="int", default=BODY_WIDTH, help="number of characters per output line, 0 for no wrap") p.add_option("-i", "--google-list-indent", dest="list_indent", action="store", type="int", default=GOOGLE_LIST_INDENT, help="number of pixels Google indents nested lists") p.add_option("-s", "--hide-strikethrough", action="store_true", dest="hide_strikethrough", default=False, help="hide strike-through text. only relevant when -g is " + "specified as well") p.add_option("--escape-all", action="store_true", dest="escape_snob", default=False, help="Escape all special characters. Output is less " + "readable, but avoids corner case formatting issues.") (options, args) = p.parse_args() # process input encoding = "utf-8" if len(args) > 0: file_ = args[0] if len(args) == 2: encoding = args[1] if len(args) > 2: p.error('Too many arguments') if file_.startswith('http://') or file_.startswith('https://'): import urllib2 from gzip import GzipFile from urlparse import urlparse from StringIO import StringIO baseurl = file_ urls = urlparse(baseurl); j = urllib2.urlopen(urllib2.Request(baseurl, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0', 'Accept-Encoding': 'gzip, deflate', 'Referer' : 'http://%s' % (urls.hostname) }), timeout=30) if (j.code == 200) : content_encoding = j.headers.get('Content-Encoding') if content_encoding == 'gzip': data = GzipFile(fileobj=StringIO(j.read()), mode='r').read() elif content_encoding == 'deflate': data = StringIO(deflate(j.read())).getvalue() else: data = j.read() if encoding is None: try: from feedparser import _getCharacterEncoding as enc except ImportError: enc = lambda x, y: ('utf-8', 1) encoding = enc(j.headers, data)[0] if encoding == 'us-ascii': encoding = 'utf-8' else: data = open(file_, 'rb').read() if encoding is None: try: from chardet import detect except ImportError: detect = lambda x: {'encoding': 'utf-8'} encoding = detect(data)['encoding'] else: data = sys.stdin.read() data = data.decode(encoding,'ignore') h = HTML2Text(baseurl=baseurl) # handle options if options.ul_style_dash: h.ul_item_mark = '-' if options.em_style_asterisk: h.emphasis_mark = '*' h.strong_mark = '__' h.body_width = options.body_width h.list_indent = options.list_indent h.ignore_emphasis = options.ignore_emphasis h.ignore_links = options.ignore_links h.ignore_images = options.ignore_images h.google_doc = options.google_doc h.hide_strikethrough = options.hide_strikethrough h.escape_snob = options.escape_snob wrapwrite(h.handle(data))
def get( self, url, post=None, caching=None, is_ref_url=True, md5_file_cache=None, time_out=None, headers=None, detect_charsets=True, content_type=None, files=None, ): prefix = "local-file:" if url.startswith(prefix): with open(url[len(prefix):], "r") as fo: page = fo.read().decode("utf8") self.last_page = page return page if not url.startswith('http') and self.last_url: url = urllib.parse.urljoin(self.last_url, url) if caching is None: caching = self.caching url = url.replace('&', '&') url = url.replace(' ', '%20') makedirs(self.dir_cache, mode=0o777, exist_ok=True) files = files or isinstance(post, dict) and post.pop('files__', None) post_urlencoded = urllib.parse.urlencode(post).encode( 'utf-8') if post and isinstance(post, dict) else post try: file_cache = ''.join(( self.dir_cache, md5((md5_file_cache or url + (post_urlencoded or "")).encode()).hexdigest(), ("/" + url[url.find("//") + 2:].split("?", 2)[0]).replace( "/", "_"), ".html", )) except Exception: file_cache = None caching = file_cache and caching and self.cache_timeout > 0 from_cache = caching if caching: if not path.isfile(file_cache): from_cache = False else: diff_time = datetime.now() - datetime.fromtimestamp( path.getctime(file_cache)) from_cache = diff_time.seconds < self.cache_timeout self.print("[cache]" if from_cache else "", url) self.error = None self.response = None if from_cache: with open(file_cache, "r") as f: page = f.read().encode('utf8') else: if self.time_sleep: v_time_sleep = min(1, abs(gauss(0, 1)) * self.time_sleep) sleep(v_time_sleep) if not headers: headers = {} if self.ref_url and 'Referer' not in headers: headers.update({"Referer": self.ref_url}) if not self.last_url or urllib.parse.urlparse( self.last_url).netloc != urllib.parse.urlparse(url).netloc: self.opener.addheaders = self._init_opener_headers if headers: h = dict(self.opener.addheaders) h.update(headers) self.opener.addheaders = list(h.items()) if content_type == 'multipart/form-data' and post or files: post_urlencoded, multipart_headers = encode_multipart( fields=post, files=files) headers.update(multipart_headers) try: if headers: request = urllib.request.Request(url, headers=headers) else: request = url time_start = datetime.utcnow() response = self.opener.open( request, post_urlencoded if post else None, timeout=time_out or self.time_out, ) if response.info().get("Content-Encoding", None) == "gzip": buf = BytesIO(response.read()) page = GzipFile(fileobj=buf).read() else: page = response.read() self.response = response self.time_response = datetime.utcnow() - time_start if self.verify_word and self.verify_word not in page: raise NoVerifyWord("No verify word '%s', size page = %d" % (self.verify_word, len(page))) except Exception as err: self.error = err if self.assert_on_fail: if self.proxer: self.proxer.fail() raise FailOnGetResponse(err) else: traceback.print_exc() return try: if file_cache and caching: cookie_write = True if self.response.info().get("Content-Type").startswith( "application/json"): page = dumps(loads(page), indent=4) cookie_write = False if self.response.info().get("Content-Type").startswith( "image/"): cookie_write = False with open(file_cache, "w") as f: f.write(page.decode('utf8')) if cookie_write: f.write("\n\n" + dumps(self.get_cookies(), indent=4)) except Exception: traceback.print_exc() self.print("[cache] ERROR: write to", file_cache) if self.proxer: if not self.error: self.proxer.ok(self.time_response) else: self.proxer.fail() if detect_charsets: matches = re.findall( r'charset=["\']?(?P<charset>[^"\'\s\.>;]{3,}\b)', str(page), re.IGNORECASE) if matches: charsets = [c.lower() for c in matches] if len(charsets) > 1 and len(set(charsets)) > 1: self.print( f'[WARNING] set multi charset values: {charsets}') charset = charsets[-1].lower() else: charset = 'utf-8' try: charset_detect = chardet.detect(page) if charset_detect and charset_detect['confidence'] > 0.98: charset = charset_detect['encoding'] except Exception as e: self.print('exception on charset detect:', str(e)) if charset in ('utf-8', 'utf8'): page = page.decode('utf-8', 'replace') elif charset in ('windows-1251', 'cp1251'): page = page.decode('cp1251', 'replace') else: page = page.decode(charset, 'replace') self.last_page = page self.last_url = self.response.geturl() if self.response else url if is_ref_url: self.ref_url = self.last_url self.file_cache_clear() return page
class OGDClient(object): HTTPError = HTTPError BadRequestError = BadRequestError UnauthorizedError = UnauthorizedError ForbiddenError = ForbiddenError NotFoundError = NotFoundError NonRetryableHTTPError = NonRetryableHTTPError def __init__(self): self._json = None self.data = b"" @staticmethod def is_logged_in(): # non-empty ogd_auth means we are logged in (probably, the # authorization can in theory have been invalidated on the server return bool(Settings.instance()["database_auth"]) def login_task(self, username, password): return LoginTask(self, username, password) def logout_task(self, auth_token): return LogoutTask(self, auth_token) @retry def auth(self, username, password, device_id, device_name): result = self.post("/api/auth", { "username": username, "password": password, "device_id": device_id, "device_name": device_name }, auth=False) return result @retry def deauth(self, auth_token): result = self.post("/api/deauth", {"auth_token": auth_token}, auth=False) return result @staticmethod def url_prefix(): return openretro_url_prefix() def opener(self): username, password = self.credentials() # FIXME: use cache dict? return opener_for_url_prefix(self.url_prefix(), username, password) @staticmethod def credentials(): auth_token = Settings.instance()["database_auth"] return "auth_token", auth_token def post(self, path, params=None, data=None, auth=True): headers = {} if auth: credentials = self.credentials() headers[str("Authorization")] = str( "Basic " + base64.b64encode("{0}:{1}".format( *credentials).encode("UTF-8")).decode("UTF-8")) connection = openretro_http_connection() url = "{0}{1}".format(openretro_url_prefix(), path) # if params: # url += "?" + urlencode(params) if not data and params: data = urlencode(params) headers[str("Content-Type")] = \ str("application/x-www-form-urlencoded") print(url, headers) if isinstance(data, dict): data = json.dumps(data) # print(data) connection.request(str("POST"), str(url), data, headers=headers) response = connection.getresponse() if response.status not in [200]: print(response.status, response.reason) if response.status == 400: class_ = BadRequestError elif response.status == 401: class_ = UnauthorizedError elif response.status == 403: class_ = ForbiddenError elif response.status == 404: class_ = NotFoundError else: class_ = HTTPError raise class_(url, response.status, response.reason, response.getheaders(), None) data = response.read() if len(data) > 0 and data[0:1] == b"{": doc = json.loads(data.decode("UTF-8")) return doc return data def build_url(self, path, **kwargs): url = "{0}{1}".format(self.url_prefix(), path) if kwargs: url += "?" + urlencode(kwargs) return url def get_request(self, url): request = Request(url) print("get_request:", url) request.add_header("Accept-Encoding", "gzip") response = self.opener().open(request) return self.handle_response(response) def handle_response(self, response): self._json = None self.data = response.read() # print(dir(response.headers)) try: getheader = response.headers.getheader except AttributeError: getheader = response.getheader content_encoding = getheader("content-encoding", "").lower() if content_encoding == "gzip": # data = zlib.decompress(data) fake_stream = StringIO(self.data) self.data = GzipFile(fileobj=fake_stream).read() def json_response(self): if self._json is None: self._json = json.loads(self.data.decode("UTF-8")) return self._json def rate_variant(self, variant_uuid, like=None, work=None): params = { "game": variant_uuid, } if like is not None: params["like"] = like if work is not None: params["work"] = work url = self.build_url("/api/1/rate_game", **params) self.get_request(url) return self.json_response()
class OGDClient(object): HTTPError = HTTPError BadRequestError = BadRequestError UnauthorizedError = UnauthorizedError ForbiddenError = ForbiddenError NotFoundError = NotFoundError NonRetryableHTTPError = NonRetryableHTTPError def __init__(self): self._json = None self.data = b"" @staticmethod def is_logged_in(): # non-empty ogd_auth means we are logged in (probably, the # authorization can in theory have been invalidated on the server return bool(app.settings["database_auth"]) def login_task(self, username, password): return LoginTask(self, username, password) def logout_task(self, auth_token): return LogoutTask(self, auth_token) @retry def auth(self, username, password, device_id, device_name): result = self.post( "/api/auth", {"username": username, "password": password, "device_id": device_id, "device_name": device_name}, auth=False) return result @retry def deauth(self, auth_token): result = self.post( "/api/deauth", {"auth_token": auth_token}, auth=False) return result @staticmethod def url_prefix(): return openretro_url_prefix() def opener(self): username, password = self.credentials() # FIXME: use cache dict? return opener_for_url_prefix(self.url_prefix(), username, password) @staticmethod def credentials(): auth_token = app.settings["database_auth"] return "auth_token", auth_token def post(self, path, params=None, data=None, auth=True): headers = {} if auth: credentials = self.credentials() headers[str("Authorization")] = str("Basic " + base64.b64encode( "{0}:{1}".format(*credentials).encode("UTF-8")).decode("UTF-8")) connection = openretro_http_connection() url = "{0}{1}".format(openretro_url_prefix(), path) # if params: # url += "?" + urlencode(params) if not data and params: data = urlencode(params) headers[str("Content-Type")] = \ str("application/x-www-form-urlencoded") print(url, headers) if isinstance(data, dict): data = json.dumps(data) # print(data) connection.request(str("POST"), str(url), data, headers=headers) response = connection.getresponse() if response.status not in [200]: print(response.status, response.reason) if response.status == 400: class_ = BadRequestError elif response.status == 401: class_ = UnauthorizedError elif response.status == 403: class_ = ForbiddenError elif response.status == 404: class_ = NotFoundError else: class_ = HTTPError raise class_(url, response.status, response.reason, response.getheaders(), None) data = response.read() if len(data) > 0 and data[0:1] == b"{": doc = json.loads(data.decode("UTF-8")) return doc return data def build_url(self, path, **kwargs): url = "{0}{1}".format(self.url_prefix(), path) if kwargs: url += "?" + urlencode(kwargs) return url def get_request(self, url): request = Request(url) print("get_request:", url) request.add_header("Accept-Encoding", "gzip") response = self.opener().open(request) return self.handle_response(response) def handle_response(self, response): self._json = None self.data = response.read() # print(dir(response.headers)) try: getheader = response.headers.getheader except AttributeError: getheader = response.getheader content_encoding = getheader("content-encoding", "").lower() if content_encoding == "gzip": # data = zlib.decompress(data) fake_stream = StringIO(self.data) self.data = GzipFile(fileobj=fake_stream).read() def json_response(self): if self._json is None: self._json = json.loads(self.data.decode("UTF-8")) return self._json def rate_variant(self, variant_uuid, like=None, work=None): params = { "game": variant_uuid, } if like is not None: params["like"] = like if work is not None: params["work"] = work url = self.build_url("/api/1/rate_game", **params) self.get_request(url) return self.json_response()
def view_billing_import(self, request, form): uploaded = 'account-statement' in request.browser_session if form.submitted(request): request.browser_session['account-statement'] = { 'invoice': form.period.data, 'data': form.xml.data['data'] } uploaded = True elif not request.POST and uploaded: del request.browser_session['account-statement'] uploaded = False if uploaded: cache = request.browser_session['account-statement'] binary = BytesIO(b64decode(cache['data'])) xml = GzipFile(filename='', mode='r', fileobj=binary).read() xml = xml.decode('utf-8') transactions = list( match_iso_20022_to_usernames(xml, request.session, period_id=cache['invoice'])) if not transactions: del request.browser_session['account-statement'] request.alert(_("No transactions were found in the given file")) uploaded = False form.xml.data = None else: transactions.sort(key=lambda t: t.order) else: transactions = None users = UserCollection(request.session) users = { u.username: (u.realname or u.username) for u in users.query().with_entities(User.username, User.realname) } layout = BillingCollectionImportLayout(self, request) return { 'layout': layout, 'title': _("Import Bank Statement"), 'form': form if not uploaded else None, 'button_text': _("Preview"), 'transactions': transactions, 'uploaded': uploaded, 'users': users, 'user_link': lambda u: request.class_link(InvoiceCollection, {'username': u}), 'success_count': transactions and sum(1 for t in transactions if t.state == 'success'), 'model': self, 'post_url': layout.csrf_protected_url(request.link(self, 'execute-import')) }