def get_client_id(): response = http_get("https://soundcloud.com") rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: tree = etree.parse(StringIO(response.content), etree.HTMLParser()) script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) app_js_urls = [ script_tag.get('src') for script_tag in script_tags if script_tag is not None ] # extracts valid app_js urls from soundcloud.com content for app_js_url in app_js_urls: # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning( "Unable to fetch guest client_id from SoundCloud, check parser!") return ""
def init(engine_settings=None): global cookies # initial cookies resp = http_get(url, allow_redirects=False) if resp.ok: for r in resp.history: cookies.update(r.cookies) cookies.update(resp.cookies)
def obtain_token(): update_time = time() - (time() % 3600) try: token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0) token['value'] = loads(token_response.text)['code'] token['last_updated'] = update_time except: pass return token
def get_client_id(): response = http_get("https://soundcloud.com") if response.ok: tree = html.fromstring(response.content) script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content for app_js_url in app_js_urls: # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: cids = cid_re.search(response.text) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") return ""
def get_client_id(): response = http_get("https://soundcloud.com") rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: tree = etree.parse(StringIO(response.content), etree.HTMLParser()) script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content for app_js_url in app_js_urls: # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") return ""
def get_client_id(): response = http_get("https://soundcloud.com") if response.ok: tree = html.fromstring(response.content) # script_tags has been moved from /assets/app/ to /assets/ path. I # found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js script_tags = tree.xpath("//script[contains(@src, '/assets/')]") app_js_urls = [ script_tag.get('src') for script_tag in script_tags if script_tag is not None ] # extracts valid app_js urls from soundcloud.com content for app_js_url in app_js_urls: # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: cids = cid_re.search(response.content.decode("utf-8")) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning( "Unable to fetch guest client_id from SoundCloud, check parser!") return ""
def image_proxy(): url = request.args.get('url') if not url: return '', 400 h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() if h != request.args.get('h'): return '', 400 headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) headers['User-Agent'] = gen_useragent() resp = http_get(url, stream=True, timeout=settings['server'].get('request_timeout', 2), headers=headers) if resp.status_code == 304: return '', resp.status_code if resp.status_code != 200: logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) if resp.status_code >= 400: return '', resp.status_code return '', 400 if not resp.headers.get('content-type', '').startswith('image/'): logger.debug('image-proxy: wrong content-type: {0}'.format(resp.get('content-type'))) return '', 400 img = '' chunk_counter = 0 for chunk in resp.iter_content(1024*1024): chunk_counter += 1 if chunk_counter > 5: return '', 502 # Bad gateway - file is too big (>5M) img += chunk headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) return Response(img, mimetype=resp.headers['content-type'], headers=headers)
def get(*args, **kwargs): if 'timeout' not in kwargs: kwargs['timeout'] = settings['outgoing']['request_timeout'] return http_get(*args, **kwargs)
def get(*args, **kwargs): if 'timeout' not in kwargs: kwargs['timeout'] = settings['outgoing']['request_timeout'] kwargs['raise_for_httperror'] = True return http_get(*args, **kwargs)