def youtube_channel_search_gdocs(client, query, cfg): search_query = f"site:youtube.com/channel \\\"{query}\\\"" search_results = gdoc_search(search_query, cfg) channels = [] for result in search_results: sanitized = "https://youtube.com/" + ('/'.join(result["link"].split('/')[3:5])) if sanitized not in channels: channels.append(sanitized) if not channels: return False results = {"channels": [], "length": len(channels)} channels = channels[:5] for profil_url in channels: req = client.get(profil_url) source = req.text data = json.loads( source.split('window["ytInitialData"] = ')[1].split('window["ytInitialPlayerResponse"]')[0].split(';\n')[0]) avatar_link = data["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].split('=')[0] req = client.get(avatar_link) img = Image.open(BytesIO(req.content)) hash = image_hash(img) title = data["metadata"]["channelMetadataRenderer"]["title"] results["channels"].append({"profil_url": profil_url, "name": title, "hash": hash}) return results
def youtube_channel_search_gdocs(client, query, data_path, gdocs_public_doc): search_query = f"site:youtube.com/channel \\\"{query}\\\"" search_results = gdoc_search(search_query, data_path, gdocs_public_doc) channels = [] for result in search_results: sanitized = "https://youtube.com/" + ('/'.join( result["link"].split('/')[3:5])) if sanitized not in channels: channels.append(sanitized) if not channels: return False results = {"channels": [], "length": len(channels)} channels = channels[:5] for profile_url in channels: data = None avatar_link = None retries = 2 for retry in list(range(retries))[::-1]: req = client.get(profile_url) source = req.text try: data = json.loads( source.split('window["ytInitialData"] = ')[1].split( 'window["ytInitialPlayerResponse"]')[0].split(';\n') [0]) avatar_link = data["metadata"]["channelMetadataRenderer"][ "avatar"]["thumbnails"][0]["url"].split('=')[0] except (KeyError, IndexError) as e: if retry == 0: return False continue else: break req = client.get(avatar_link) img = Image.open(BytesIO(req.content)) hash = image_hash(img) title = data["metadata"]["channelMetadataRenderer"]["title"] results["channels"].append({ "profile_url": profile_url, "name": title, "hash": hash }) return results