Ejemplo n.º 1
0
def snap_set_expression(stream, PatternDict):
    assert     stream.__class__.__name__ == "StringIO" \
            or stream.__class__.__name__ == "file"

    __debug_entry("set_expression", stream)

    result = snap_property_set(stream)
    if result != None: return result

    x = stream.read(2)
    if x == "\\C":
        return case_fold_expression.do(stream,
                                       PatternDict,
                                       snap_set_expression=snap_set_expression)

    elif x == "[:":
        result = snap_set_term(stream, PatternDict)
        skip_whitespace(stream)
        x = stream.read(2)
        if x != ":]":
            raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \
                                             "found: '%s'" % x)
    elif x[0] == "[":
        stream.seek(-1, 1)
        result = traditional_character_set.do(stream)

    elif x[0] == "{":
        stream.seek(-1, 1)
        result = snap_replacement(stream, PatternDict, StateMachineF=False)

    else:
        result = None

    return __debug_exit(result, stream)
Ejemplo n.º 2
0
def __parse_property_expression(stream, PropertyLetter, EqualConditionPossibleF=True):
    """Parses an expression of the form '\? { X [ = Y] }' where
       ? = PropertyLetter. If the '=' operator is present then
       two fields are returned first = left hand side, second = 
       right hand side. Othewise an element is returned.
    """
    assert len(PropertyLetter) == 1
    assert type(PropertyLetter) == str
    assert type(EqualConditionPossibleF) == bool

    # verify '\?'
    x = stream.read(2)
    if x != "\\" + PropertyLetter: 
        raise RegularExpressionException("Unicode property letter '\\%s' expected, received '%s'." % x)
    
    skip_whitespace(stream)

    x = stream.read(1)
    if x != "{": 
        raise RegularExpressionException("Unicode property '\\%s' not followed by '{'." % PropertyLetter)

    content = __snap_until(stream, "}")
    
    fields = content.split("=")

    if len(fields) == 0:
        raise RegularExpressionException("Unicode property expression '\\%s{}' cannot have no content.")

    if len(fields) > 2:
        raise RegularExpressionException("Unicode property expression '\\%s' can have at maximum one '='.")

    if not EqualConditionPossibleF and len(fields) == 2:
        raise RegularExpressionException("Unicode property expression '\\%s' does not allow '=' conditions")

    return map(lambda x: x.strip(), fields)
Ejemplo n.º 3
0
def snap_set_expression(stream):
    assert     stream.__class__.__name__ == "StringIO" \
            or stream.__class__.__name__ == "file"

    __debug_entry("set_expression", stream)

    result = snap_property_set(stream)
    if result != None: return result

    x = stream.read(2)
    if x == "[:":
        result = snap_set_term(stream)
        skip_whitespace(stream)
        x = stream.read(2)
        if x != ":]":
            raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \
                                             "found: '%s'" % x)
    elif x[0] == "[":
        stream.seek(-1, 1)
        result = traditional_character_set.do(stream)
    elif x == "\\P":
        stream.seek(-2, 1)
        result = property.do(stream)
    elif x == "\\N":
        stream.seek(-2, 1)
        result = property.do_shortcut(stream, "N", "na")  # UCS Property: Name
    elif x == "\\G":
        stream.seek(-2, 1)
        result = property.do_shortcut(stream, "G",
                                      "gc")  # UCS Property: General_Category
    else:
        result = None

    return __debug_exit(result, stream)
Ejemplo n.º 4
0
def snap_set_expression(stream, PatternDict):
    assert     stream.__class__.__name__ == "StringIO" \
            or stream.__class__.__name__ == "file"

    __debug_entry("set_expression", stream)

    result = snap_property_set(stream)
    if result != None: return result

    x = stream.read(2)
    if   x == "\\C":
        return case_fold_expression.do(stream, PatternDict, snap_set_expression=snap_set_expression)

    elif x == "[:":
        result = snap_set_term(stream, PatternDict)
        skip_whitespace(stream)
        x = stream.read(2)
        if x != ":]":
            raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \
                                             "found: '%s'" % x)
    elif x[0] == "[":
        stream.seek(-1, 1)
        result = traditional_character_set.do(stream)   

    elif x[0] == "{":
        stream.seek(-1, 1)
        result = snap_replacement(stream, PatternDict, StateMachineF=False)   

    else:
        result = None

    return __debug_exit(result, stream)
def snap_set_expression(stream):
    assert     stream.__class__.__name__ == "StringIO" \
            or stream.__class__.__name__ == "file"

    __debug_entry("set_expression", stream)

    result = snap_property_set(stream)
    if result != None: return result

    x = stream.read(2)
    if   x == "[:":
        result = snap_set_term(stream)
        skip_whitespace(stream)
        x = stream.read(2)
        if x != ":]":
            raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \
                                             "found: '%s'" % x)
    elif x[0] == "[":
        stream.seek(-1, 1)
        result = traditional_character_set.do(stream)   
    elif x == "\\P": 
        stream.seek(-2, 1)
        result = property.do(stream)
    elif x == "\\N": 
        stream.seek(-2, 1)
        result = property.do_shortcut(stream, "N", "na") # UCS Property: Name
    elif x == "\\G": 
        stream.seek(-2, 1)
        result = property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category
    else:
        result = None

    return __debug_exit(result, stream)
Ejemplo n.º 6
0
def snap_set_term(stream, PatternDict):
    __debug_entry("set_term", stream)

    operation_list = ["union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db().keys()

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list:
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
                    result.unite_with(character_set)
            result = result.inverse()
            if Setup.get_character_value_limit() != -1:
                result.intersect_with(
                    Interval(0, Setup.get_character_value_limit()))
            return __debug_exit(result, stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")

        if word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in character_set_list:
        result = special_character_set_db()[word]

    elif word != "":
        verify_word_in_list(word, character_set_list + operation_list,
                            "Unknown keyword '%s'." % word, stream)
    else:
        stream.seek(position)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
Ejemplo n.º 7
0
def snap_set_term(stream, PatternDict):
    __debug_entry("set_term", stream)    

    operation_list     = [ "union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db().keys()

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list: 
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L      = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
                    result.unite_with(character_set)
            result = result.inverse()
            if Setup.get_character_value_limit() != -1:
                result.intersect_with(Interval(0, Setup.get_character_value_limit()))
            return __debug_exit(result, stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")
            
        if   word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in character_set_list:
        result = special_character_set_db()[word]

    elif word != "":
        verify_word_in_list(word, character_set_list + operation_list, 
                            "Unknown keyword '%s'." % word, stream)
    else:
        stream.seek(position)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
Ejemplo n.º 8
0
def snap_set_term(stream):
    __debug_entry("set_term", stream)

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    try:
        word = read_until_non_letter(stream)
        stream.seek(-1, 1)  # putback the non-letter
    except:
        word = "not a valid word"

    word = word.strip()

    if word in ["union", "intersection", "difference", "inverse"]:
        set_list = snap_set_list(stream, word)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for set in set_list[1:]:
                    result.unite_with(set)
            result = result.inverse()
            return __debug_exit(result, stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")

        if word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in special_character_set_db.keys():
        result = special_character_set_db[word]

    else:
        # try to snap an expression out of it
        stream.seek(position)
        result = snap_set_expression(stream)

    return __debug_exit(result, stream)
def snap_set_term(stream):
    __debug_entry("set_term", stream)    

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    try:    
        word = read_until_non_letter(stream)
        stream.seek(-1, 1)  # putback the non-letter
    except: 
        word = "not a valid word"

    word = word.strip()

    if word in [ "union", "intersection", "difference", "inverse"]: 
        set_list = snap_set_list(stream, word)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L      = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for set in set_list[1:]:
                    result.unite_with(set)
            result = result.inverse()
            return __debug_exit(result, stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")
            
        if   word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in special_character_set_db.keys():
        result = special_character_set_db[word]

    else:
        # try to snap an expression out of it
        stream.seek(position)
        result = snap_set_expression(stream)

    return __debug_exit(result, stream)
Ejemplo n.º 10
0
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if   x == "\\P": 
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N": 
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na") # UCS Property: Name
    elif x == "\\G": 
        stream.seek(position)
        return property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category
    elif x == "\\E": 
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error_msg("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        return codec_db.get_supported_unicode_character_set(encoding_name, stream)
    else:
        stream.seek(position)
        return None
Ejemplo n.º 11
0
def snap_set_list(stream, set_operation_name):
    __debug_entry("set_list", stream)

    skip_whitespace(stream)
    if stream.read(1) != "(":
        raise RegularExpressionException(
            "Missing opening bracket '%s' operation." % set_operation_name)

    set_list = []
    while 1 + 1 == 2:
        skip_whitespace(stream)
        result = snap_set_term(stream)
        if result == None:
            raise RegularExpressionException(
                "Missing set expression list after '%s' operation." %
                set_operation_name)
        set_list.append(result)
        skip_whitespace(stream)
        tmp = stream.read(1)
        if tmp != ",":
            if tmp != ")":
                stream.seek(-1, 1)
                raise RegularExpressionException(
                    "Missing closing ')' after after '%s' operation." %
                    set_operation_name)
            return __debug_exit(set_list, stream)
Ejemplo n.º 12
0
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if x == "\\P":
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N":
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na")  # UCS Property: Name
    elif x == "\\G":
        stream.seek(position)
        return property.do_shortcut(stream, "G",
                                    "gc")  # UCS Property: General_Category
    elif x == "\\E":
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error_msg("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        return codec_db.get_supported_unicode_character_set(
            encoding_name, stream)
    else:
        stream.seek(position)
        return None
Ejemplo n.º 13
0
def snap_set_list(stream, set_operation_name, PatternDict):
    __debug_entry("set_list", stream)

    skip_whitespace(stream)
    if stream.read(1) != "(": 
        raise RegularExpressionException("Missing opening bracket '%s' operation." % set_operation_name)

    set_list = []
    while 1 + 1 == 2:
        skip_whitespace(stream)
        result = snap_set_term(stream, PatternDict)
        if result == None: 
            raise RegularExpressionException("Missing set expression list after '%s' operation." % set_operation_name)
        set_list.append(result)
        skip_whitespace(stream)
        tmp = stream.read(1)
        if tmp != ",": 
            if tmp != ")":
                stream.seek(-1, 1)
                raise RegularExpressionException("Missing closing ')' after after '%s' operation." % set_operation_name)
            return __debug_exit(set_list, stream)
Ejemplo n.º 14
0
def do(sh, PatternDict, snap_expression=None, snap_set_expression=None):
    """Parse a case fold expression of the form \C(..){ R } or \C{ R }.
       Assume that '\C' has been snapped already from the stream.

       See function ucs_case_fold_parser.get_fold_set() for details
       about case folding.

       snap_expression != None, then snap_expression is the function 
                                to parse a RE and the caller
                                expects a state machine.

       snap_set_expression != None, then snap_set_expression is the
                                    function to parse a character 
                                    set and caller expects a 
                                    NumberSet object.
    """

    pos = sh.tell()
    skip_whitespace(sh)
    # -- parse the optional options in '(' ')' brackets
    if not check(sh, "("):
        # By default 'single' and 'multi' character case folds are active
        if snap_set_expression != None: flag_txt = "s"
        else:                           flag_txt = "sm"

    else:
        flag_txt = read_until_character(sh, ")")

        if flag_txt == "":
            sh.seek(pos)
            error_msg("Missing closing ')' in case fold expression.", sh)

        flag_txt = flag_txt.replace(" ", "").replace("\t", "").replace("\n", "")

        for letter in flag_txt:
            if letter not in "smt":
                sh.seek(pos)
                error_msg("Letter '%s' not permitted as case fold option.\n" % letter + \
                          "Options are:  's' for simple case fold.\n" + \
                          "              'm' for multi character sequence case fold.\n" + \
                          "              't' for special turkish case fold rules.", sh)

            if snap_set_expression != None and letter == "m":
                sh.seek(pos)
                error_msg("Option 'm' not permitted as case fold option in set expression.\n" + \
                          "Set expressions cannot absorb multi character sequences.", sh)

        skip_whitespace(sh)

    # -- parse the expression in '{' '}' which is subject to case folding
    if not check(sh, "{"):
        sh.seek(pos)
        error_msg("Missing '{' for case fold expression.", sh)

    skip_whitespace(sh)
    if snap_set_expression != None:
        trigger_set = snap_set_expression(sh, PatternDict)
        if trigger_set == None:
            error_msg("Missing character set for case fold in set expression.\n" + 
                      "The content in '\\C{content}' should start with '[' or '[:'.", 
                      sh)

        # -- perform the case fold for Sets!
        for interval in trigger_set.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                fold = ucs_case_fold.get_fold_set(i, flag_txt)
                for x in fold:
                    assert type(x) != list
                    trigger_set.add_interval(Interval(x, x+1))

        result = trigger_set

    else:
        sm = snap_expression(sh, PatternDict)
        if sm == None:
            error_msg("Missing expression for case fold '\C'.\n" + 
                      "The content in '\\C{content}' should start with '[' or '[:'.", 
                      sh)

        # -- perform the case fold for State Machines!
        for state_idx, state in sm.states.items():
            transitions = state.transitions()
            for target_state_idx, trigger_set in transitions.get_map().items():
                __add_case_fold(sm, flag_txt, trigger_set, state_idx, target_state_idx)

        result = sm

    if not check(sh, "}"):
        sh.seek(pos)
        error_msg("Missing '}' for case fold expression.", sh)

    return result
Ejemplo n.º 15
0
def do(sh, PatternDict, snap_expression=None, snap_set_expression=None):
    """Parse a case fold expression of the form \C(..){ R } or \C{ R }.
       Assume that '\C' has been snapped already from the stream.

       See function ucs_case_fold_parser.get_fold_set() for details
       about case folding.

       snap_expression != None, then snap_expression is the function 
                                to parse a RE and the caller
                                expects a state machine.

       snap_set_expression != None, then snap_set_expression is the
                                    function to parse a character 
                                    set and caller expects a 
                                    NumberSet object.
    """

    pos = sh.tell()
    skip_whitespace(sh)
    # -- parse the optional options in '(' ')' brackets
    if not check(sh, "("):
        # By default 'single' and 'multi' character case folds are active
        if snap_set_expression != None: flag_txt = "s"
        else: flag_txt = "sm"

    else:
        flag_txt = read_until_character(sh, ")")

        if flag_txt == "":
            sh.seek(pos)
            error_msg("Missing closing ')' in case fold expression.", sh)

        flag_txt = flag_txt.replace(" ", "").replace("\t",
                                                     "").replace("\n", "")

        for letter in flag_txt:
            if letter not in "smt":
                sh.seek(pos)
                error_msg("Letter '%s' not permitted as case fold option.\n" % letter + \
                          "Options are:  's' for simple case fold.\n" + \
                          "              'm' for multi character sequence case fold.\n" + \
                          "              't' for special turkish case fold rules.", sh)

            if snap_set_expression != None and letter == "m":
                sh.seek(pos)
                error_msg("Option 'm' not permitted as case fold option in set expression.\n" + \
                          "Set expressions cannot absorb multi character sequences.", sh)

        skip_whitespace(sh)

    # -- parse the expression in '{' '}' which is subject to case folding
    if not check(sh, "{"):
        sh.seek(pos)
        error_msg("Missing '{' for case fold expression.", sh)

    skip_whitespace(sh)
    if snap_set_expression != None:
        trigger_set = snap_set_expression(sh, PatternDict)
        if trigger_set == None:
            error_msg(
                "Missing character set for case fold in set expression.\n" +
                "The content in '\\C{content}' should start with '[' or '[:'.",
                sh)

        # -- perform the case fold for Sets!
        for interval in trigger_set.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                fold = ucs_case_fold.get_fold_set(i, flag_txt)
                for x in fold:
                    assert type(x) != list
                    trigger_set.add_interval(Interval(x, x + 1))

        result = trigger_set

    else:
        sm = snap_expression(sh, PatternDict)
        if sm == None:
            error_msg(
                "Missing expression for case fold '\C'.\n" +
                "The content in '\\C{content}' should start with '[' or '[:'.",
                sh)

        # -- perform the case fold for State Machines!
        for state_idx, state in sm.states.items():
            transitions = state.transitions()
            for target_state_idx, trigger_set in transitions.get_map().items():
                __add_case_fold(sm, flag_txt, trigger_set, state_idx,
                                target_state_idx)

        result = sm

    if not check(sh, "}"):
        sh.seek(pos)
        error_msg("Missing '}' for case fold expression.", sh)

    return result
Ejemplo n.º 16
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"):
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup

        # A regular expression state machine
        pattern_str, state_machine = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.",
                      fh)

        verify_word_in_list(
            identifier, ["space", "grid", "bad", "newline", "suppressor"],
            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(state_machine.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = state_machine.get_init_state().transitions(
            ).get_map()
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_space(pattern_str, trigger_set,
                                                value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_grid(pattern_str, trigger_set, value,
                                               fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set,
                                                   variable, fh)
                else:
                    error_msg(
                        "Missing integer or variable name after keyword 'grid'.",
                        fh)

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, state_machine, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, state_machine,
                                                 fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg(
                "Missing ';' after indentation '%s' specification." %
                identifier, fh)
Ejemplo n.º 17
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"):
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup

        # A regular expression state machine
        pattern_str, state_machine = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.", fh)

        verify_word_in_list(
            identifier,
            ["space", "grid", "bad", "newline", "suppressor"],
            "Unrecognized indentation specifier '%s'." % identifier,
            fh,
        )

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(state_machine.states) != 2:
                error_msg(
                    "For indentation '%s' only patterns are addmissible which\n" % identifier
                    + 'can be matched by a single character, e.g. " " or [a-z].',
                    fh,
                )
            transition_map = state_machine.get_init_state().transitions().get_map()
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_space(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set, variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set, 1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_grid(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh)
                else:
                    error_msg("Missing integer or variable name after keyword 'grid'.", fh)

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, state_machine, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, state_machine, fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg("Missing ';' after indentation '%s' specification." % identifier, fh)