Beispiel #1
0
    def _checkSymbolFormRuleKey(self, symStr):
        """makes sure that symbol usage is valid for weight label keys
        permits step separators and expression characters

        >>> g = Grammar()
        >>> g._checkSymbolFormRuleKey('wer')

        """
        if symStr == '':  # permit the empty string
            return

        valid = self.SYM + self.STEP + ''.join(self.EXPRESS)
        for char in symStr:
            if char not in valid:
                raise error.TransitionSyntaxError(
                    "rule definition uses illegal characters (%s)" % char)
        # there must be at least one symbol on left side of production
        # rule that is just a symbol
        count = 0
        # can be a defined symbol or an all expression
        for char in self.SYM + self.EXPRESSALL:
            if char in symStr:
                count += 1
            if count > 0: break
        if count == 0:  # no symbols were found
            raise error.TransitionSyntaxError(
                "rule definition does not define source symbol")
Beispiel #2
0
 def _parseWeightValue(self, pairWeight):
     """read a complete dictionary of transition keys and weights, 
     and load weights as a list"""
     self._weightSrc = {} 
     for key, value in list(pairWeight.items()):
         # make key into a list of symbol strings
         key = self._parseWeightKey(key)
         # make value into a list of pairs
         weights = value.split(self.ASSIGNDELIMIT)
         weightList = []
         for assign in weights:
             if self.ASSIGN not in assign: continue
             if assign.count(self.ASSIGN) > 1: # bad syntax or other error
                 raise error.TransitionSyntaxError("incorrect weight specification: %s" % assign)
             symbol, w = assign.split(self.ASSIGN)
             # convert to float or int, may not be less tn zero
             # will return None on error
             w = drawer.strToNum(w, 'num', 0, None)
             # it woudl be nice to accept zero weights but this causes many
             # side-effects; need to test in whole
             # not defining all weights is permitted
             if w in (None, 0): # no zero weights, or other errors
                 raise error.TransitionSyntaxError("bad weight value given: %s" % assign)
             weightList.append((symbol, w))
         # assign to weight src
         self._weightSrc[key] = weightList 
Beispiel #3
0
    def _checkRuleReference(self):
        """Make sure that all rule outputs and inputs refer to defined symbols. Rule inputs can use EXPRESSALL for matching 

        This method is called in _parse(). 

         """
        self._maxRuleOutputSize = 0

        knownSym = list(self._symbols.keys())
        for inRule, outRule in list(self._rules.items()):
            #environment.printDebug(['in rule, out rule', inRule, outRule])
            environment.printDebug(
                ['in rule', repr(inRule), 'out rule', outRule])

            # need to iterate through rule parts first
            match = True
            # permit empty string as input: this is not yet fully implemented
            if inRule == '':
                pass
            else:
                for r in inRule:
                    if r not in knownSym + [self.EXPRESSALL]:
                        match = False
            if not match:
                raise error.TransitionSyntaxError(
                    "source rule component (%s) references an undefined symbol"
                    % inRule)

            # check out rules, of which there is 1 or more
            # NOTE: this assumes there are not delimiters used
            match = False
            for option, weight in outRule:  # pairs of value, weight

                #environment.printDebug(['_checkRuleReference(): option', repr(option)])

                if len(option) > self._maxRuleOutputSize:
                    self._maxRuleOutputSize = len(option)

                if option == '':  # accept empty output option
                    match = True

                # if out rules point to more then value, need to split here
                for char in option:
                    #                     if char == '': # permit empty string
                    #                         match = True
                    if char not in knownSym:
                        match = False
                        break
                    else:
                        match = True
                if not match:
                    break
            if not match:
                raise error.TransitionSyntaxError(
                    "destination rule component (%s) references an undefined symbol"
                    % outRule)
Beispiel #4
0
    def _parse(self, usrStr):
        # divide all groups into pairs of key, {}-enclosed values
        # all elements of notation are <key>{<value>} pairs
        # this notation has two types: symbol definitions and weight definitions
        # symbol defs: keys are alphabetic, values can be anything (incl lists)
        #                   name{symbol}
        # weight defs: keys are source transitions statments w/ step indicator :
        #                   transition{name=weight|name=weight}
        # support for limited regular expressions in weight defs
        # t:*:t match any one in the second palce; not e same as zero or more
        # t:-t:t match anything that is not t
        # t:w|q:t match either (alternation)
        
        # note: this will remove all spaces in all keys and all values

        self._parseValidate(usrStr)
        usrStr = self._parseClean(usrStr)
        
        pairSymbol = {}
        pairWeight = {}
        groups = usrStr.split(self.CLOSE)
        for double in groups:
            if self.OPEN not in double: continue
            try:
                key, value = double.split(self.OPEN)
            except: # possible syntax error in formationi
                raise error.TransitionSyntaxError("badly placed delimiters")
            # key is always a symbol def: will change case and remove spaces
            key = drawer.strScrub(key, 'lower', [' ']) # rm spaces from key
            # split into 2 dictionaries, one w/ symbol defs, one w/ weights
            # if it is or has a step indicator (:), it is not a def
            if self.STEP in key or self.ASSIGN in value: # it is a weight
                # store weights values in lower
                self._checkSymbolFormWeightKey(key)
                pairWeight[key] = drawer.strScrub(value, 'lower', [' ']) 
            else: # must be a symbol def
                self._checkSymbolFormDef(key) # will raise exception on bad key
                pairSymbol[key] = drawer.strScrub(value, None, [' ']) 
        # this initializes symbol table
        if pairSymbol == {}:
            raise error.TransitionSyntaxError("no symbols defined")
        self._symbols = pairSymbol
        # pass the pair dictionary to weight parser
        if pairWeight == {}:
            raise error.TransitionSyntaxError("no weights defined")
        self._parseWeightValue(pairWeight)
        # check symbol usage and determine orders
        self._checkSymbolUsage()
Beispiel #5
0
 def _checkSymbolFormWeightKey(self, symStr):
     """makes sure that symbol usage is valid for weight label keys
     permits expression characters"""
     valid = self.SYM + self.STEP + ''.join(self.EXPRESS)
     for char in symStr:
         if char not in valid:
             raise error.TransitionSyntaxError("symbol definition uses illegal characters (%s)" % char)
Beispiel #6
0
    def _parseValidate(self, usrStr):
        """make sure the the string is well formed

        >>> g = Grammar()
        >>> g._parseValidate('sdf{3}')
        >>> g._parseValidate('sdf{3}}')
        Traceback (most recent call last):
        TransitionSyntaxError: all braces not paired
        """
        if usrStr.count(self.OPEN) != usrStr.count(self.CLOSE):
            # replace with exception subclass
            raise error.TransitionSyntaxError("all braces not paired")
Beispiel #7
0
    def _parseWeightKey(self, key):
        """ make key into a list of symbol strings
        store expression weight keys in a tuple, with operator leading, as a sub 
        tuple. only one operator is allowed, must be tuple b/c will be a dict key

        >>> a = Transition()
        >>> a._parseWeightKey('a:b:c')
        ('a', 'b', 'c')
        >>> a._parseWeightKey('a:b:c|d')
        ('a', 'b', ('|', 'c', 'd'))
        >>> a._parseWeightKey('a:b:c|d|e')
        ('a', 'b', ('|', 'c', 'd', 'e'))
        >>> a._parseWeightKey('a:*:c')
        ('a', ('*',), 'c')
        >>> a._parseWeightKey('a:*:-c')
        ('a', ('*',), ('-', 'c'))
        """

        # make key into a list of symbol strings
        # if key is self.STEP, assign as empty tuple
        if key == self.STEP: return ()
        key = tuple(key.split(self.STEP)) # always split by step delim
        # filter empty strings
        keyPost = [] 
        for element in key:
            if element == '': continue
            keyPost.append(element.strip())
        key = keyPost
        # check for expressions in each segment of key
        keyFinal = []
        for segment in key:
            keyPost = []
            for exp in self.EXPRESS:
                if exp in segment:
                    keyPost.append(exp)
            if len(keyPost) == 0: # no expressions used, a normal weight key
                keyFinal.append(segment) # make it a tuple before return
            elif len(keyPost) > 1:
                msg = "only one operator may be used per weight key segment"
                raise error.TransitionSyntaxError(msg)
            # definitial an expression, pack new tuple, leading with expression op
            # if it is an or sym, need to split by this symbol
            else:
                if self.EXPRESSOR in segment:
                    opperands = segment.split(self.EXPRESSOR)
                    segmentPost = [self.EXPRESSOR] + opperands
                    keyFinal.append(tuple(segmentPost))
                else: # key post already has expression operator leading
                    for val in segment:
                        if val in self.EXPRESS: continue 
                        keyPost.append(val)
                    keyFinal.append(tuple(keyPost))
        return tuple(keyFinal)
Beispiel #8
0
 def _parseAxiom(self, axiomSrc=None):
     """Call this after all symbols have been found
     """
     knownSym = list(self._symbols.keys())
     if axiomSrc != None:
         # NOTE: assumes no delimiters between symbols
         axiomSrc = axiomSrc.strip()
         for char in axiomSrc:
             if char not in knownSym:
                 raise error.TransitionSyntaxError(
                     "bad axiom value given: %s" % char)
         self._axiom = axiomSrc
     else:  # get a random start
         self._axiom = random.choice(knownSym)
     # always update state to axiom
     self._state = self._axiom
Beispiel #9
0
 def _checkSymbolUsage(self):
     """check to see if all symbols used in weight keys are in symbol list
     also update orders; this fills _ordersSrc"""
     symbols = list(self._symbols.keys())
     for key in list(self._weightSrc.keys()):
         ord = len(key) # a zero key will be an empty tuple
         if ord not in self._ordersSrc: 
             self._ordersSrc.append(ord) # len of weight label is order
         # check symbols used in each weight
         weights = self._weightSrc[key]
         for w in weights:
             # weights are always symbol, number pairs
             if w[0] not in symbols:
                 raise error.TransitionSyntaxError("weight specified for undefined symbol: %s" % w[0])
     # sort order list             
     self._ordersSrc.sort()
Beispiel #10
0
    def _checkSymbolFormDef(self, symStr):
        """makes sure that symbol usage is valid for symbol definitions
        symbols cannot have spaces, case, or strange characters

        >>> g = Grammar()
        >>> g._checkSymbolFormDef('wer')
        >>> g._checkSymbolFormDef('w:er')
        Traceback (most recent call last):
        TransitionSyntaxError: symbol definition uses illegal characters (:)

        >>> g._checkSymbolFormDef('wer@#$')
        Traceback (most recent call last):
        TransitionSyntaxError: symbol definition uses illegal characters (@)

        """
        for char in symStr:
            if char not in self.SYM:
                raise error.TransitionSyntaxError(
                    "symbol definition uses illegal characters (%s)" % char)
Beispiel #11
0
    def _parse(self, usrStr):
        '''
        >>> g = Grammar()
        >>> g._parse('a{3}b{4} @ a{b}b{a|b}')
        >>> g._parse('a{3}b{4} @ a{b}b{a|b|c}')
        Traceback (most recent call last):
        TransitionSyntaxError: destination rule component ([('a', 1), ('b', 1), ('c', 1)]) references an undefined symbol

        >>> g._parse('a{3}b{4}c{3} @ a{b}b{a|b|c}')

        >>> g._parse('a{3}b{4}c{3} @ a{b}d{a|b|c}')
        Traceback (most recent call last):
        TransitionSyntaxError: source rule component (d) references an undefined symbol

        >>> g._symbols
        {'a': '3', 'c': '3', 'b': '4'}

        >>> g._parse('a{3}b{4}c{3} @ a{bb}b{aa|b|c}')
        >>> g._symbols
        {'a': '3', 'c': '3', 'b': '4'}
        >>> g._rules
        {'a': [('bb', 1)], 'b': [('aa', 1), ('b', 1), ('c', 1)]}

        >>> g._parse('a{3}b{4} @ a{b}b{a|b} @ b')
        >>> g._axiom
        'b'
        >>> g._parse('a{3}b{4} @ a{b}b{a|b} @ baab')
        >>> g._axiom
        'baab'

        >>> g._parse('a{3}b{4} @ a{b}ac{a|b} @ baab')
        Traceback (most recent call last):
        TransitionSyntaxError: source rule component (ac) references an undefined symbol

        >>> g._parse('a{3}b{4} @ *{b} @ baab') # * can be source, not dst
        >>> g._parse('a{3}b{4} @ a{*} @ baab') # * cannot be destination
        Traceback (most recent call last):
        TransitionSyntaxError: destination rule component ([('*', 1)]) references an undefined symbol

        '''
        # divide all groups into pairs of key, {}-enclosed values
        # all elements of notation are <key>{<value>} pairs

        self._parseValidate(usrStr)
        usrStr = self._parseClean(usrStr)

        pairSymbol = {}
        pairRule = {}

        if usrStr.count(self.SPLIT) not in [1, 2]:  # must be 1 or 2
            raise error.TransitionSyntaxError(
                "must include exactly one split delimiter (%s)" % self.SPLIT)
        if usrStr.count(self.SPLIT) == 1:
            partSymbol, partRule = usrStr.split(self.SPLIT)
            partAxiom = None
        elif usrStr.count(self.SPLIT) == 2:  # split into three
            partSymbol, partRule, partAxiom = usrStr.split(self.SPLIT)

        for subStr, dst in [(partSymbol, 'symbol'), (partRule, 'rule')]:
            groups = subStr.split(self.CLOSE)
            for double in groups:
                if self.OPEN not in double:
                    continue
                try:
                    key, value = double.split(self.OPEN)
                except:  # possible syntax error in formationi
                    raise error.TransitionSyntaxError(
                        "badly placed delimiters")

                # key is always a symbol def: will change case and remove spaces
                key = drawer.strScrub(key, 'lower',
                                      [' '])  # rm spaces from key

                # split into 2 dictionaries, one w/ symbol defs, one w/ rules
                # symbol defs must come before
                if dst == 'symbol':
                    self._checkSymbolFormDef(
                        key)  # will raise exception on bad key
                    pairSymbol[key] = drawer.strScrub(value, None, [' '])
                elif dst == 'rule':
                    self._checkSymbolFormRuleKey(key)
                    pairRule[key] = drawer.strScrub(value, 'lower', [' '])

        # this initializes symbol table
        if pairSymbol == {}:
            raise error.TransitionSyntaxError("no symbols defined")
        self._symbols = pairSymbol
        # pass the pair dictionary to weight parser
        if pairRule == {}:
            raise error.TransitionSyntaxError("no rules defined")
        self._parseRuleValue(pairRule)  # assigns to self._rules

        # check symbol usage and determine orders
        self._checkRuleReference()
        # assigns axiom value
        self._parseAxiom(partAxiom)
Beispiel #12
0
    def _parseRuleValue(self, pairRule):
        """Read a preliminary dictionary of rules, and split into a list of rules based on having one or more probabilistic rule options

        >>> g = Grammar()
        >>> g._parseRuleValue({'a': 'ab'})
        >>> g._rules
        {'a': [('ab', 1)]}

        >>> g._parseRuleValue({'a': 'ab|ac'})
        >>> g._rules
        {'a': [('ab', 1), ('ac', 1)]}

        >>> g._parseRuleValue({'a': 'ab|ac|aa=3'})
        >>> g._rules
        {'a': [('ab', 1), ('ac', 1), ('aa', 3)]}


        >>> g._parseRuleValue({'a': 'ab=3|c=12|aa=3'})
        >>> g._rules
        {'a': [('ab', 3), ('c', 12), ('aa', 3)]}

        >>> g._parseRuleValue({'a': 'ab=3|c=12|aa=3', 'c': 'b=3|c=5|a'})
        >>> g._rules
        {'a': [('ab', 3), ('c', 12), ('aa', 3)], 'c': [('b', 3), ('c', 5), ('a', 1)]}

        >>> g._parseRuleValue({'a': 'ab=3|c=12|aa=0'})
        Traceback (most recent call last):
        TransitionSyntaxError: bad weight value given: aa=0

        >>> g._parseRuleValue({'a': ''})
        >>> g._rules
        {'a': [('', 1)]}

        >>> g._parseRuleValue({'a': 'ab=3|c=12|'})
        >>> g._rules
        {'a': [('ab', 3), ('c', 12), ('', 1)]}

        >>> g._parseRuleValue({'*a': 'ab=3|c=12|a=3'})
        >>> g._rules
        {'*a': [('ab', 3), ('c', 12), ('a', 3)]}

        """
        self._rules = {}  # this always clears the last rules
        for key, value in list(pairRule.items()):
            # make value into a src:dst pairs
            ruleList = []
            weights = value.split(self.ASSIGNDELIMIT)  # this is the |

            if len(weights) == 1:
                # if there is only one weight, add an assignment value of 1
                # this is permitted
                if self.ASSIGN not in weights[0]:
                    ruleList.append((weights[0], 1))
                else:  # remove weight, as it is not significant
                    w = weights[0].split(self.ASSIGN)[0]
                    ruleList.append((w, 1))
            # if there are no assignments but more than one option
            # that is, no = sign assignments
            elif value.count(self.ASSIGN) == 0:
                for symbol in weights:
                    ruleList.append((symbol, 1))
            else:
                #environment.printDebug(['obtained weights', weights, value.count(self.ASSIGN)])

                for assign in weights:
                    # if not assignment, provide one as a string
                    if self.ASSIGN not in assign:
                        assign += '=1'  # assume 1

                    symbol, w = assign.split(self.ASSIGN)
                    # convert to float or int, may not be less tn zero
                    # will return None on error
                    w = drawer.strToNum(w, 'num', 0, None)
                    if w in [None, 0]:  # no zero weights, or other errors
                        raise error.TransitionSyntaxError(
                            "bad weight value given: %s" % assign)
                    ruleList.append((symbol, w))
            self._rules[key] = ruleList
Beispiel #13
0
 def _checkSymbolFormDef(self, symStr):
     """makes sure that symbol usage is valid for symbol definitions
     symbols cannot have spaces, case, or strange characters"""
     for char in symStr:
         if char not in self.SYM:
             raise error.TransitionSyntaxError("symbol definition uses illegal characters (%s)" % char)
Beispiel #14
0
 def _parseValidate(self, usrStr):
     """make sure the the string appears correct"""
     if usrStr.count(self.OPEN) != usrStr.count(self.CLOSE):
         raise error.TransitionSyntaxError("all braces not paired")