Esempio n. 1
0
def is_age_restricted(watch_html):
    """Check if content is age restricted.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is age restricted.
    """
    try:
        regex_search(r'og:restrictions:age', watch_html, group=0)
    except RegexMatchError:
        return False
    return True
Esempio n. 2
0
def get_initial_function_name(js):
    """Extract the name of the function responsible for computing the signature.
    :param str js:
        The contents of the base.js asset file.
    """
    # c&&d.set("signature", EE(c));
    pattern = [
        r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>['
        r'a-zA-Z0-9$]+)\(',
        r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\('
        r'\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\('
        r'\s*""\s*\)', r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
        r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,'
        r'\s*(?:encodeURIComponent\s*\()?\s*(?P<si$',
        r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>['
        r'a-zA-Z0-9$]+)\(',
        r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>['
        r'a-zA-Z0-9$]+)\('
    ]
    logger.debug('finding initial function name')
    return regex_search(pattern, js, group=1)
Esempio n. 3
0
def parse_function(js_func):
    """Parse the Javascript transform function.
    Break a JavaScript transform function down into a two element ``tuple``
    containing the function name and some integer-based argument.
    :param str js_func:
        The JavaScript version of the transform function.
    :rtype: tuple
    :returns:
        two element tuple containing the function name and an argument.
    **Example**:
    >>> parse_function('DE.AJ(a,15)')
    ('AJ', 15)
    """
    logger.debug('parsing transform function')
    return regex_search(r'\w+\.(\w+)\(\w,(\d+)\)', js_func, groups=True)
Esempio n. 4
0
def video_id(url):
    """Extract the ``video_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/watch?v={video_id}`
    - :samp:`https://youtube.com/embed/{video_id}`
    - :samp:`https://youtu.be/{video_id}`

    :param str url:
        A YouTube url containing a video id.
    :rtype: str
    :returns:
        YouTube video id.
    """
    return regex_search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url, group=1)
Esempio n. 5
0
def video_info_url(
    video_id,
    watch_url,
    watch_html,
    embed_html,
    age_restricted,
):
    """Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str watch_url:
        A YouTube watch url.
    :param str watch_html:
        The html contents of the watch page.
    :param str embed_html:
        The html contents of the embed page (for age restricted videos).
    :param bool age_restricted:
        Is video age restricted.
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    """
    if age_restricted:
        sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
        # Here we use ``OrderedDict`` so that the output is consistent between
        # Python 2.7+.
        params = OrderedDict([
            ('video_id', video_id),
            ('eurl', eurl(video_id)),
            ('sts', sts),
        ])
    else:
        params = OrderedDict([
            ('video_id', video_id),
            ('el', '$el'),
            ('ps', 'default'),
            ('eurl', quote(watch_url)),
            ('hl', 'en_US'),
        ])
    return 'https://youtube.com/get_video_info?' + urlencode(params)
Esempio n. 6
0
def get_transform_object(js, var):
    """Extract the "transform object".
    The "transform object" contains the function definitions referenced in the
    "transform plan". The ``var`` argument is the obfuscated variable name
    which contains these functions, for example, given the function call
    ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var.
    :param str js:
        The contents of the base.js asset file.
    :param str var:
        The obfuscated variable name that stores an object with all functions
        that descrambles the signature.
    **Example**:
    >>> get_transform_object(js, 'DE')
    ['AJ:function(a){a.reverse()}',
    'VR:function(a,b){a.splice(0,b)}',
    'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']
    """
    pattern = r'var %s={(.*?)};' % re.escape(var)
    logger.debug('getting transform object')
    return (regex_search(pattern, js, group=1,
                         flags=re.DOTALL).replace('\n', ' ').split(', '))
Esempio n. 7
0
def get_transform_plan(js):
    """Extract the "transform plan".
    The "transform plan" is the functions that the ciphered signature is
    cycled through to obtain the actual signature.
    :param str js:
        The contents of the base.js asset file.
    **Example**:
    >>> get_transform_plan(js)
    ['DE.AJ(a,15)',
    'DE.VR(a,3)',
    'DE.AJ(a,51)',
    'DE.VR(a,3)',
    'DE.kT(a,51)',
    'DE.kT(a,8)',
    'DE.VR(a,3)',
    'DE.kT(a,21)']
    """
    name = re.escape(get_initial_function_name(js))
    pattern = r'%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}' % name
    logger.debug('getting transform plan')
    return regex_search(pattern, js, group=1).split(';')
Esempio n. 8
0
def get_ytplayer_config(html, age_restricted=False):
    """Get the YouTube player configuration data from the watch html.

    Extract the ``ytplayer_config``, which is json data embedded within the
    watch html and serves as the primary source of obtaining the stream
    manifest data.

    :param str watch_html:
        The html contents of the watch page.
    :param bool age_restricted:
        Is video age restricted.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    """
    if age_restricted:
        pattern = r";yt\.setConfig\(\{'PLAYER_CONFIG':\s*({.*})(,'EXPERIMENT_FLAGS'|;)"  # noqa: E501
    else:
        pattern = r';ytplayer\.config\s*=\s*({.*?});'
    yt_player_config = regex_search(pattern, html, group=1)
    return json.loads(yt_player_config)
Esempio n. 9
0
def mime_type_codec(mime_type_codec):
    """Parse the type data.

    Breaks up the data in the ``type`` key of the manifest, which contains the
    mime type and codecs serialized together, and splits them into separate
    elements.

    **Example**:

    >>> mime_type_codec('audio/webm; codecs="opus"')
    ('audio/webm', ['opus'])

    :param str mime_type_codec:
        String containing mime type and codecs.
    :rtype: tuple
    :returns:
        The mime type and a list of codecs.

    """
    pattern = r'(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\"'
    mime_type, codecs = regex_search(pattern, mime_type_codec, groups=True)
    return mime_type, [c.strip() for c in codecs.split(',')]