Beispiel #1
0
def initial_data(watch_html: str) -> str:
    """Extract the ytInitialData json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    """
    patterns = [
        r"window\[['\"]ytInitialData['\"]]\s*=\s*",
        r"ytInitialData\s*=\s*"
    ]
    for pattern in patterns:
        try:
            return parse_for_object(watch_html, pattern)
        except HTMLParseError:
            pass

    raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
Beispiel #2
0
def test_invalid_start():
    with pytest.raises(HTMLParseError):
        parse_for_object('test = {}', r'invalid_regex')
Beispiel #3
0
def test_parse_context_closer_in_string_value():
    test_html = 'test = {"foo": "};"};'
    result = parse_for_object(test_html, r'test\s*=\s*')
    assert result == {'foo': '};'}
Beispiel #4
0
def test_parse_simple_object():
    test_html = 'test = {"foo": [], "bar": {}};'
    result = parse_for_object(test_html, r'test\s*=\s*')
    assert result == {'foo': [], 'bar': {}}
Beispiel #5
0
def test_parse_empty_object_with_trailing_characters():
    test_html = 'test = {};'
    result = parse_for_object(test_html, r'test\s*=\s*')
    assert result == {}
Beispiel #6
0
def test_parse_simple_empty_object():
    result = parse_for_object('test = {}', r'test\s*=\s*')
    assert result == {}