Ejemplo n.º 1
0
def youtube_channel_search_gdocs(client, query, cfg):
    search_query = f"site:youtube.com/channel \\\"{query}\\\""
    search_results = gdoc_search(search_query, cfg)
    channels = []
    for result in search_results:
        sanitized = "https://youtube.com/" + ('/'.join(result["link"].split('/')[3:5]))
        if sanitized not in channels:
            channels.append(sanitized)

    if not channels:
        return False
    results = {"channels": [], "length": len(channels)}
    channels = channels[:5]
    for profil_url in channels:
        req = client.get(profil_url)
        source = req.text

        data = json.loads(
            source.split('window["ytInitialData"] = ')[1].split('window["ytInitialPlayerResponse"]')[0].split(';\n')[0])
        avatar_link = data["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].split('=')[0]
        req = client.get(avatar_link)
        img = Image.open(BytesIO(req.content))
        hash = image_hash(img)
        title = data["metadata"]["channelMetadataRenderer"]["title"]
        results["channels"].append({"profil_url": profil_url, "name": title, "hash": hash})
    return results
Ejemplo n.º 2
0
def youtube_channel_search_gdocs(client, query, data_path, gdocs_public_doc):
    search_query = f"site:youtube.com/channel \\\"{query}\\\""
    search_results = gdoc_search(search_query, data_path, gdocs_public_doc)
    channels = []

    for result in search_results:
        sanitized = "https://youtube.com/" + ('/'.join(
            result["link"].split('/')[3:5]))
        if sanitized not in channels:
            channels.append(sanitized)

    if not channels:
        return False

    results = {"channels": [], "length": len(channels)}
    channels = channels[:5]

    for profile_url in channels:
        data = None
        avatar_link = None

        retries = 2
        for retry in list(range(retries))[::-1]:
            req = client.get(profile_url)
            source = req.text
            try:
                data = json.loads(
                    source.split('window["ytInitialData"] = ')[1].split(
                        'window["ytInitialPlayerResponse"]')[0].split(';\n')
                    [0])
                avatar_link = data["metadata"]["channelMetadataRenderer"][
                    "avatar"]["thumbnails"][0]["url"].split('=')[0]
            except (KeyError, IndexError) as e:
                if retry == 0:
                    return False
                continue
            else:
                break
        req = client.get(avatar_link)
        img = Image.open(BytesIO(req.content))
        hash = image_hash(img)
        title = data["metadata"]["channelMetadataRenderer"]["title"]
        results["channels"].append({
            "profile_url": profile_url,
            "name": title,
            "hash": hash
        })

    return results