Beispiel #1
0
def quoted_val(state, text, i, formats, user_data):
    ' A quoted attribute value '
    quote = '"' if state.parse is DQ_VAL else "'"
    add_attr_data(user_data, ATTR_VALUE, ATTR_START, i)
    pos = text.find(quote, i)
    if pos == -1:
        num = len(text) - i
        is_link = is_class = False
    else:
        num = pos - i + 1
        state.parse = IN_OPENING_TAG
        if state.tag_being_defined is not None and state.attribute_name in ('lang', 'xml:lang'):
            try:
                state.tag_being_defined.lang = parse_lang_code(text[i:pos])
            except ValueError:
                pass
        add_attr_data(user_data, ATTR_VALUE, ATTR_END, i + num)
        is_link = state.attribute_name in LINK_ATTRS
        is_class = not is_link and state.attribute_name == 'class'

    if is_link:
        if verify_link(text[i:i+num - 1], user_data.doc_name) is False:
            return [(num - 1, formats['bad_link']), (1, formats['string'])]
        return [(num - 1, formats['link']), (1, formats['string'])]
    elif is_class:
        return [(num - 1, formats['class_attr']), (1, formats['string'])]
    return [(num, formats['string'])]
Beispiel #2
0
def quoted_val(state, text, i, formats, user_data):
    ' A quoted attribute value '
    quote = '"' if state.parse is DQ_VAL else "'"
    add_attr_data(user_data, ATTR_VALUE, ATTR_START, i)
    pos = text.find(quote, i)
    if pos == -1:
        num = len(text) - i
        is_link = is_class = False
    else:
        num = pos - i + 1
        state.parse = IN_OPENING_TAG
        if state.tag_being_defined is not None and state.attribute_name in (
                'lang', 'xml:lang'):
            try:
                state.tag_being_defined.lang = parse_lang_code(text[i:pos])
            except ValueError:
                pass
        add_attr_data(user_data, ATTR_VALUE, ATTR_END, i + num)
        is_link = state.attribute_name in LINK_ATTRS
        is_class = not is_link and state.attribute_name == 'class'

    if is_link:
        if verify_link(text[i:i + num - 1], user_data.doc_name) is False:
            return [(num - 1, formats['bad_link']), (1, formats['string'])]
        return [(num - 1, formats['link']), (1, formats['string'])]
    elif is_class:
        return [(num - 1, formats['class_attr']), (1, formats['string'])]
    return [(num, formats['string'])]
Beispiel #3
0
def content(state, text, i, formats, user_data):
    ' Inside content blocks '
    m = space_pat.match(text, i)
    if m is not None:
        return [(len(m.group()), None)]
    cdo = cdo_pat.match(text, i)
    if cdo is not None:
        state.parse = IN_COMMENT_CONTENT
        return [(len(cdo.group()), formats['comment'])]
    if text[i] == '"':
        state.parse = IN_DQS
        return [(1, formats['string'])]
    if text[i] == "'":
        state.parse = IN_SQS
        return [(1, formats['string'])]
    if text[i] == '}':
        state.blocks -= 1
        state.parse = NORMAL if state.blocks < 1 else IN_CONTENT
        return [(1, formats['bracket'])]
    if text[i] == '{':
        state.blocks += 1
        return [(1, formats['bracket'])]
    for token, fmt, name in content_tokens:
        m = token.match(text, i)
        if m is not None:
            if name is URL_TOKEN:
                h = 'link'
                url = m.group()
                prefix, main, suffix = url[:4], url[4:-1], url[-1]
                if len(main) > 1 and main[0] in ('"',
                                                 "'") and main[0] == main[-1]:
                    prefix += main[0]
                    suffix = main[-1] + suffix
                    main = main[1:-1]
                    h = 'bad_link' if verify_link(
                        main, user_data.doc_name) is False else 'link'
                return [(len(prefix), formats[fmt]), (len(main), formats[h]),
                        (len(suffix), formats[fmt])]
            return [(len(m.group()), formats[fmt])]

    return [(len(text) - i, formats['unknown-normal'])]
Beispiel #4
0
def content(state, text, i, formats, user_data):
    ' Inside content blocks '
    m = space_pat.match(text, i)
    if m is not None:
        return [(len(m.group()), None)]
    cdo = cdo_pat.match(text, i)
    if cdo is not None:
        state.parse = IN_COMMENT_CONTENT
        return [(len(cdo.group()), formats['comment'])]
    if text[i] == '"':
        state.parse = IN_DQS
        return [(1, formats['string'])]
    if text[i] == "'":
        state.parse = IN_SQS
        return [(1, formats['string'])]
    if text[i] == '}':
        state.blocks -= 1
        state.parse = NORMAL if state.blocks < 1 else IN_CONTENT
        return [(1, formats['bracket'])]
    if text[i] == '{':
        state.blocks += 1
        return [(1, formats['bracket'])]
    for token, fmt, name in content_tokens:
        m = token.match(text, i)
        if m is not None:
            if name is URL_TOKEN:
                h = 'link'
                url = m.group()
                prefix, main, suffix = url[:4], url[4:-1], url[-1]
                if len(main) > 1 and main[0] in ('"', "'") and main[0] == main[-1]:
                    prefix += main[0]
                    suffix = main[-1] + suffix
                    main = main[1:-1]
                    h = 'bad_link' if verify_link(main, user_data.doc_name) is False else 'link'
                return [(len(prefix), formats[fmt]), (len(main), formats[h]), (len(suffix), formats[fmt])]
            return [(len(m.group()), formats[fmt])]

    return [(len(text) - i, formats['unknown-normal'])]