Example #1
0
def __parse_property_expression(stream, PropertyLetter, EqualConditionPossibleF=True):
    """Parses an expression of the form '\? { X [ = Y] }' where
       ? = PropertyLetter. If the '=' operator is present then
       two fields are returned first = left hand side, second = 
       right hand side. Othewise an element is returned.
    """
    assert len(PropertyLetter) == 1
    assert type(PropertyLetter) == str
    assert type(EqualConditionPossibleF) == bool

    # verify '\?'
    x = stream.read(2)
    if x != "\\" + PropertyLetter: 
        raise RegularExpressionException("Unicode property letter '\\%s' expected, received '%s'." % x)
    
    skip_whitespace(stream)

    x = stream.read(1)
    if x != "{": 
        raise RegularExpressionException("Unicode property '\\%s' not followed by '{'." % PropertyLetter)

    content = __snap_until(stream, "}")
    
    fields = content.split("=")

    if len(fields) == 0:
        raise RegularExpressionException("Unicode property expression '\\%s{}' cannot have no content.")

    if len(fields) > 2:
        raise RegularExpressionException("Unicode property expression '\\%s' can have at maximum one '='.")

    if not EqualConditionPossibleF and len(fields) == 2:
        raise RegularExpressionException("Unicode property expression '\\%s' does not allow '=' conditions")

    return map(lambda x: x.strip(), fields)
Example #2
0
def __snap_repetition_range(the_state_machine, stream):
    """Snaps a string that represents a repetition range. The following 
       syntaxes are supported:
           '?'      one or none repetition
           '+'      one or arbitrary repetition
           '*'      arbitrary repetition (even zero)
           '{n}'    exactly 'n' repetitions
           '{m,n}'  from 'm' to 'n' repetitions
           '{n,}'   arbitrary, but at least 'n' repetitions
    """
    assert the_state_machine.__class__.__name__ == "StateMachine", \
           "received object of type '%s'" % the_state_machine.__class__.__name__ + "\n" + \
           repr(the_state_machine)

    position_0 = stream.tell()
    x = stream.read(1)
    if x == "+": result = repeat.do(the_state_machine, 1)
    elif x == "*": result = repeat.do(the_state_machine)
    elif x == "?": result = repeat.do(the_state_machine, 0, 1)
    elif x == "{":
        repetition_range_str = __snap_until(stream, "}")
        if len(repetition_range_str) and not repetition_range_str[0].isdigit():
            # no repetition range, so everything remains as it is
            stream.seek(position_0)
            return the_state_machine

        try:
            if repetition_range_str.find(",") == -1:
                # no ',' thus "match exactly a certain number":
                # e.g. {4} = match exactly four repetitions
                number = int(repetition_range_str)
                result = repeat.do(the_state_machine, number, number)
                return result
            # a range of numbers is given
            fields = repetition_range_str.split(",")
            fields = map(lambda x: x.strip(), fields)

            number_1 = int(fields[0].strip())
            if fields[1] == "": number_2 = -1  # e.g. {2,}
            else: number_2 = int(fields[1].strip())  # e.g. {2,5}
            # produce repeated state machine
            result = repeat.do(the_state_machine, number_1, number_2)
            return result
        except:
            raise RegularExpressionException("error while parsing repetition range expression '%s'" \
                                             % repetition_range_str)
    else:
        # no repetition range, so everything remains as it is
        stream.seek(position_0)
        return the_state_machine

    return result
Example #3
0
def __snap_repetition_range(the_state_machine, stream):    
    """Snaps a string that represents a repetition range. The following 
       syntaxes are supported:
           '?'      one or none repetition
           '+'      one or arbitrary repetition
           '*'      arbitrary repetition (even zero)
           '{n}'    exactly 'n' repetitions
           '{m,n}'  from 'm' to 'n' repetitions
           '{n,}'   arbitrary, but at least 'n' repetitions
    """       
    assert the_state_machine.__class__.__name__ == "StateMachine", \
           "received object of type '%s'" % the_state_machine.__class__.__name__ + "\n" + \
           repr(the_state_machine)

    position_0 = stream.tell()
    x = stream.read(1)
    if   x == "+": result = repeat.do(the_state_machine, 1)
    elif x == "*": result = repeat.do(the_state_machine)
    elif x == "?": result = repeat.do(the_state_machine, 0, 1)
    elif x == "{":
        repetition_range_str = __snap_until(stream, "}")
        if len(repetition_range_str) and not repetition_range_str[0].isdigit():
            # no repetition range, so everything remains as it is
            stream.seek(position_0)
            return the_state_machine
            
        try:
            if repetition_range_str.find(",") == -1:
                # no ',' thus "match exactly a certain number": 
                # e.g. {4} = match exactly four repetitions
                number = int(repetition_range_str)
                result = repeat.do(the_state_machine, number, number)
                return result
            # a range of numbers is given       
            fields = repetition_range_str.split(",")
            fields = map(lambda x: x.strip(), fields)

            number_1 = int(fields[0].strip())
            if fields[1] == "": number_2 = -1                      # e.g. {2,}
            else:               number_2 = int(fields[1].strip())  # e.g. {2,5}  
            # produce repeated state machine 
            result = repeat.do(the_state_machine, number_1, number_2)
            return result
        except:
            raise RegularExpressionException("error while parsing repetition range expression '%s'" \
                                             % repetition_range_str)
    else:
        # no repetition range, so everything remains as it is
        stream.seek(position_0)
        return the_state_machine
    
    return result
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if   x == "\\P": 
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N": 
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na") # UCS Property: Name
    elif x == "\\G": 
        stream.seek(position)
        return property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category
    elif x == "\\E": 
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error_msg("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        return codec_db.get_supported_unicode_character_set(encoding_name, stream)
    else:
        stream.seek(position)
        return None
Example #5
0
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if x == "\\P":
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N":
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na")  # UCS Property: Name
    elif x == "\\G":
        stream.seek(position)
        return property.do_shortcut(stream, "G",
                                    "gc")  # UCS Property: General_Category
    elif x == "\\E":
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error_msg("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        return codec_db.get_supported_unicode_character_set(
            encoding_name, stream)
    else:
        stream.seek(position)
        return None