def _find_client_id(self): res = get_req(url='https://soundcloud.com/', headers=self._headers) for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', res.text)): res_js_script = get_req(url=src, headers=self._headers) if res_js_script: cliend_id = re.findall(r'client_id\s*:\s*"([0-9a-zA-Z]{32})"', res_js_script.text) if cliend_id: return cliend_id[0] raise SoundCloudException('Unable to extract client id')
def _extract_info_dict_for_sets(self, *args, **kwargs): query = kwargs.get('query', {}).copy() query['client_id'] = kwargs.get('client_id') url_info = kwargs.get('url_info') response = get_req(url=url_info, headers=self._headers, params=query) res_json = response.json() _title = res_json.get('title') _id = res_json.get('id') _tracks = res_json.get('tracks') sys.stdout.write(fg + '[' + fc + '*' + fg + '] : Playlist %s found %s track.\n' % (_title, len(_tracks))) if _tracks: for ele, track in enumerate(_tracks): _id_track = track.get('id') dict_info_track = self._extract_info_dict( query=query, url_info=self._API_V2_BASE + 'resolve?url=' + self._API_BASE + 'tracks/' + str(_id_track), client_id=self._cliend_id, ele=ele + 1) self._download_track(dict_info_track=dict_info_track, _show_all_info=self._show_all_info, ele=ele + 1)
def _extract_playlist(self, url, query): query['client_id'] = self._cliend_id query.update({ 'limit': 2000000000, 'linked_partitioning': '1', }) res = get_req(url=url, headers=self._headers, params=query) if not isinstance(res.json(), dict): return return res.json()
def _get_info_user(self, url, query): query['client_id'] = self._cliend_id res = get_req(url=url, headers=self._headers, params=query) if not isinstance(res.json(), dict): return return res.json()
def _extract_url_transcodings(self, url_trans, query): res = get_req(url=url_trans, params=query, headers=self._headers) return res.json()
def _extract_info_dict(self, *args, **kwargs): ele = kwargs.get('ele') or '' text = fg + '\r[' + fc + '*' + fg + '] : Extracting info of track %s ... ' % ( ele) spinner(text=text) query = kwargs.get('query', {}).copy() query['client_id'] = kwargs.get('client_id') url_info = kwargs.get('url_info') response = get_req(url=url_info, headers=self._headers, params=query) res_json = response.json() format_urls = set() formats = [] _description = res_json.get('description') _title = res_json.get('title') _artwork_url = res_json.get('artwork_url') _id = res_json.get('id') formats.append({ 'title': _title, 'id': _id, 'artwork_url': _artwork_url, 'description': _description, }) def invalid_url(url): return not url or url in format_urls or re.search( r'/(?:preview|playlist)/0/30/', url) def add_format(f, protocol): mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url) if mobj: for k, v in mobj.groupdict().items(): if not f.get(k): f[k] = v format_id_list = [] if protocol: format_id_list.append(protocol) for k in ('ext', 'abr'): v = f.get(k) if v: format_id_list.append(v) abr = f.get('abr') if abr: f['abr'] = int(abr) f.update({ 'format_id': '_'.join(format_id_list), 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', }) formats.append(f) transcodings = try_get(res_json, lambda x: x['media']['transcodings'], list) or [] for trans in transcodings: if not isinstance(trans, dict): continue format_url = trans.get('url') stream = self._extract_url_transcodings(format_url, query) if not isinstance(stream, dict): continue stream_url = stream.get('url') spinner(text=text) if invalid_url(stream_url): continue format_urls.add(stream_url) stream_format = trans.get('format') or {} protocol = stream_format.get('protocol') if protocol != 'hls' and '/hls' in format_url: protocol = 'hls' ext = None preset = trans.get('preset') if preset: ext = preset.split('_')[0] if ext not in KNOWN_EXTENSIONS: ext = mimetype2ext(stream_format.get('mime_type')) add_format({ 'url': stream_url, 'ext': ext, }, 'http' if protocol == 'progressive' else protocol) sys.stdout.write(fg + '\r[' + fc + '*' + fg + '] : Extracting info of track %s. (done)\n' % (ele)) return formats