def test_00(self): """TestMaximalMunchReplace: Space runs to single space.""" myStrGen = test_BufGen.StrGen(' abc def ghi d ') myBg = MaxMunchGen.MaxMunchGen( next(myStrGen), [ self.whitespace, self.nonwhitespace, ] ) # print # for aVal in myBg.gen(): # print aVal # return myResult = [aVal for aVal in myBg.gen()] myExpResult = [ ([' '], 'whitespace'), (['a', 'b', 'c'], 'nonwhitespace'), ([' '], 'whitespace'), (['d', 'e', 'f'], 'nonwhitespace'), ([' '], 'whitespace'), (['g', 'h', 'i'], 'nonwhitespace'), ([' '], 'whitespace'), (['d'], 'nonwhitespace'), ([' '], 'whitespace'), ] self.assertEquals(myResult, myExpResult)
def test_04_02(self): """TestMaximalMunchText: Test parsing vowels, consonants, whitespace as anyToken(): 'wrae xyz'.""" myStrGen = test_BufGen.StrGen('wrae xyz') myBg = MaxMunchGen.MaxMunchGen( next(myStrGen), [ self.vowels, self.consonants, MaxMunchGen.anyToken, ] ) #=============================================================================== # print # for aVal in myBg.gen(): # print aVal # return #=============================================================================== myResult = [aVal for aVal in myBg.gen()] myExpResult = [ (['w', 'r'], 'consonants'), (['a', 'e'], 'vowels'), ([' ', ], None), (['x', 'y', 'z'], 'consonants'), ] self.assertEquals(myResult, myExpResult)
def test_00(self): """TestMaximalMunchComment: Comment [00].""" myStrIn = """// CXX Comment / // More comment // /* C inside CXX */ /* C comment */ /* // CXX in C */ """ myStrOut = ' \n\n/ \n\n \n\n \n\n \n' myStrGen = TestBufGen.StrGen(myStrIn) myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.cComment, self.cxxComment, MaxMunchGen.anyToken, ], isExclusive=False) # print # for aVal in self._bg.gen(): # print aVal # return myResult = [aVal for aVal in myBg.gen()] #print myResult myResultStr = ''.join([''.join(v[0]) for v in myResult]) #print #print myResultStr self.assertEquals(myResultStr, myStrOut)
def genPhase3(self): """ISO/IEC 14882:1998(E) 2.4 Preprocessing tokens [lex.pptoken]. Phase 3. The source file is decomposed into preprocessing tokens6) and sequences of white-space characters (including comments). A source file shall not end in a partial preprocessing token or in a partial comment. Each comment is replaced by one space character. New-line characters are retained. Whether each nonempty sequence of white-space characters other than new-line is retained or replaced by one space character is implementation-defined. NOTE: Whitespace sequences are not merged so " /**/ " will generate three tokens each of PpToken.PpToken(' ', 'whitespace'). So this yields the tokens from translation phase 3 if supplied with the results of translation phase 2. NOTE: This does not generate 'header-name' tokens as these are context dependent i.e. they are only valid in the context of a #include directive. [ISO/IEC 9899:1999 (E) 6.4.7 Header names Para 3 says that: "A header name preprocessing token is recognized only within a #include preprocessing directive."]. Instead any token conumer can avail themselves of TODO ???() that can interpret a token stream as a header-name if possible. TODO: Update FileLocator correctly (?). """ self.checkIOK() myBg = MaxMunchGen.MaxMunchGen( self.genPhase2(), [ # Whitespace self.whitespace, # Comments self.cComment, self.cxxComment, #=============================================================================== # # We don't do header-name, see note above. # # identifier # self._sliceLexName, # # pp-number # self._sliceLexPpnumber, # # character-literal # self._sliceCharacterLiteral, # # string-literal # self._sliceStringLiteral, # # preprocessing-op-or-punc # self._sliceLexOperators, # # "each non-white-space character that cannot be one of the above" # self._sliceNonWhitespaceSingleChar, #=============================================================================== self.singleNonWhitespace, ], isExclusive=False, ) #self._fileLocator.startNewPhase() for aVal in myBg.gen(): #logging.debug('genPhase3: %s' % str(aVal)) yield ''.join(aVal[0])
def test_00(self): """TestMaximalMunchText: Test parsing vowels, consonants and digits: ''.""" myStrGen = TestBufGen.StrGen('') myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.vowels, self.numbers, self.consonants, ]) myResult = [aVal for aVal in myBg.gen()] myExpResult = [] self.assertEquals(myResult, myExpResult)
def test_11(self): """TestMaximalMunchText: ambiguos result.""" myStrGen = TestBufGen.StrGen('aeiou') myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.vowels, self.vowels, ]) try: myResult = [aVal for aVal in myBg.gen()] self.fail('ExceptionMaxMunchGen not raised') except MaxMunchGen.ExceptionMaxMunchGen: pass
def test_04_00(self): """TestMaximalMunchText: Test parsing vowels, consonants, no whitespace: 'wrae xyz'.""" myStrGen = TestBufGen.StrGen('wrae xyz') myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.vowels, self.consonants, ]) myResult = [aVal for aVal in myBg.gen()] myExpResult = [ (['w', 'r'], 'consonants'), (['a', 'e'], 'vowels'), #(['x', 'y', 'z'], 'consonants'), ] self.assertEquals(myResult, myExpResult)
def genPhase_(self): """Dummy phase to check MaxMunch.anyToken.""" self.checkIOK() myBg = MaxMunchGen.MaxMunchGen( self.genPhase0(), [ MaxMunchGen.anyToken, ], isExclusive=True, ) #self._fileLocator.startNewPhase() for aVal in myBg.gen(): #logging.debug('genPhase_: %s' % str(aVal)) yield ''.join(aVal[0])
def test_02(self): """TestMaximalMunchText: Test parsing vowels, consonants and digits: 'aeioubc123daaavfr'.""" myStrGen = TestBufGen.StrGen('aeioubc123daaavfr') myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.vowels, self.numbers, self.consonants, ]) myResult = [aVal for aVal in myBg.gen()] myExpResult = [ (['a', 'e', 'i', 'o', 'u'], 'vowels'), (['b', 'c'], 'consonants'), (['1', '2', '3'], 'numbers'), (['d'], 'consonants'), (['a', 'a', 'a'], 'vowels'), (['v', 'f', 'r'], 'consonants'), ] self.assertEquals(myResult, myExpResult)
def test_02(self): """TestMaximalMunchLineContinuation: Line continuation replacement - detection of one at EOF [02].""" myStrIn = 'a\\\n' myStrOut = 'a\n' myStrGen = TestBufGen.StrGen(myStrIn) myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.lineContinuation, MaxMunchGen.anyToken, ], isExclusive=True) # print # for aVal in myBg.gen(): # print aVal # return myResult = [aVal for aVal in myBg.gen()] #print myResult self.assertEquals(myResult, [(['a'], None), ([], 'lineContinuation')]) self.assertEquals(1, self._cntrAddNewLinesAfterCont)
def test_00(self): """TestMaximalMunchProfileNumbers.test_00: dec, oct and hex digit runs * 1024""" myLine = u'%s %s %s\n' % (string.octdigits, string.digits, string.hexdigits) myPStr = myLine * 4 * 1024 myFile = io.StringIO(myPStr) myBg = MaxMunchGen.MaxMunchGen( self._gen(myFile), [ self._munchOct, self._munchDec, self._munchHex, MaxMunchGen.anyToken, ], isExclusive=True, ) for aVal in myBg.gen(): pass
def test_00(self): """TestMaximalMunchTrigraph: Trigraph replacemnt.""" myPStr = '??=define arraycheck(a,b) a??(b??) ??!??! b??(a??)' myLStr = '#define arraycheck(a,b) a[b] || b[a]' myStrGen = TestBufGen.StrGen(myPStr) myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.universalCharacterName, self.trigraph, MaxMunchGen.anyToken, ], isExclusive=True) # print # for aVal in myBg.gen(): # print aVal # return myResult = [aVal for aVal in myBg.gen()] myResultStr = ''.join([''.join(v[0]) for v in myResult]) #print #print myResultStr self.assertEquals(myResultStr, myLStr)
def genPhase2(self): """Phase 2. Each instance of a backslash character (\) immediately followed by a new-line character is deleted, splicing physical source lines to form logical source lines. Only the last backslash on any physical source line shall be eligible for being part of such a splice. A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by a backslash character before any such splicing takes place.""" self.checkIOK() myBg = MaxMunchGen.MaxMunchGen( self.genPhase1(), [ self.lineContinuation, MaxMunchGen.anyToken, ], isExclusive=True, ) #self._fileLocator.startNewPhase() for aVal in myBg.gen(): #logging.debug('genPhase2: %s' % str(aVal)) yield ''.join(aVal[0])
def test_02(self): """TestMaximalMunchComment: Unclosed CXX Comment.""" myStrIn = '//' myStrOut = '' myStrGen = TestBufGen.StrGen(myStrIn) myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.cComment, self.cxxComment, MaxMunchGen.anyToken, ], isExclusive=False) # print # for aVal in self._bg.gen(): # print aVal # return try: myResult = [aVal for aVal in myBg.gen()] self.fail('ExceptionTestMaximalMunchComment not raised') except ExceptionTestMaximalMunchComment: pass
def test_01(self): """TestMaximalMunchTrigraph: universal-character-name replacemnt.""" myPStr = 'ab\xa9$@xyz' myLStr = 'ab\\u00A9$@xyz' myStrGen = TestBufGen.StrGen(myPStr) myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.trigraph, self.universalCharacterName, MaxMunchGen.anyToken, ], isExclusive=True) # print # for aVal in myBg.gen(): # print aVal # return myResult = [aVal for aVal in myBg.gen()] myResultStr = ''.join([''.join(v[0]) for v in myResult]) #print #print myResultStr self.assertEquals(myResultStr, myLStr)
def test_01(self): """TestMaximalMunchLineContinuation: Line continuation replacement [01].""" myStrIn = 'a\\\nb\\\nc\n' myStrOut = 'abc\n\n\n' myStrGen = TestBufGen.StrGen(myStrIn) myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.lineContinuation, MaxMunchGen.anyToken, ], isExclusive=True) # print # for aVal in myBg.gen(): # print aVal # return myResult = [aVal for aVal in myBg.gen()] #print myResult myResultStr = ''.join([''.join(v[0]) for v in myResult]) #print #print myResultStr self.assertEquals(myResultStr, myStrOut) self.assertEquals(0, self._cntrAddNewLinesAfterCont)
def test_03_01(self): """TestMaximalMunchText: Test parsing vowels, consonants and digits: 'wraexyz' with alternate function.""" myStrGen = TestBufGen.StrGen('wraexyz') myBg = MaxMunchGen.MaxMunchGen(next(myStrGen), [ self.vowels, self.numbers, self.consonants_2, ]) #=============================================================================== # print # for aVal in myBg.gen(): # print aVal # return #=============================================================================== myResult = [aVal for aVal in myBg.gen()] myExpResult = [ (['w', 'r'], 'consonants'), (['a', 'e'], 'vowels'), (['x', 'y', 'z'], 'consonants'), ] self.assertEquals(myResult, myExpResult)
def genPhase1(self): """Phase 1. Physical source file multibyte characters are mapped, in an implementation defined manner, to the source character set (introducing new-line characters for end-of-line indicators) if necessary. Trigraph sequences are replaced by corresponding single-character internal representations.""" #print 'genPhase1' self.checkIOK() myBg = MaxMunchGen.MaxMunchGen( self.genPhase0(), [ self.universalCharacterName, self.trigraph, MaxMunchGen.anyToken, ], isExclusive=True, ) #self._fileLocator.startNewPhase() for aVal in myBg.gen(): #logging.debug('genPhase1: %s' % str(aVal)) yield ''.join(aVal[0])