def __new__(cls, v): if isinstance(v, Macro): return v.__dimen__() elif isinstance(v, basestring) and v[-1] in encoding.stringletters(): # Get rid of glue components v = list(v.split('plus').pop(0).split('minus').pop(0).strip()) units = [] while v and v[-1] in encoding.stringletters(): units.insert(0, v.pop()) v = float(''.join(v)) units = ''.join(units) if units == 'pt': v *= 65536 elif units == 'pc': v *= 12 * 65536 elif units == 'in': v *= 72.27 * 65536 elif units == 'bp': v *= (72.27 * 65536) / 72 elif units == 'cm': v *= (72.27 * 65536) / 2.54 elif units == 'mm': v *= (72.27 * 65536) / 25.4 elif units == 'dd': v *= (1238.0 * 65536) / 1157 elif units == 'cc': v *= (1238.0 * 12 * 65536) / 1157 elif units == 'sp': pass # Encode fil(ll)s by adding 2, 4, and 6 billion elif units == 'fil': if v < 0: v -= 2e9 else: v += 2e9 elif units == 'fill': if v < 0: v -= 4e9 else: v += 4e9 elif units == 'filll': if v < 0: v -= 6e9 else: v += 6e9 elif units == 'mu': pass # Just estimates, since I don't know the actual font size elif units == 'ex': v *= 5 * 65536 elif units == 'em': v *= 11 * 65536 else: raise ValueError, 'Unrecognized units: %s' % units return float.__new__(cls, v)
def __new__(cls, v): if isinstance(v, Macro): return v.__dimen__() elif isinstance(v, str) and v[-1] in encoding.stringletters(): # Get rid of glue components v = list(v.split('plus').pop(0).split('minus').pop(0).strip()) units = [] while v and v[-1] in encoding.stringletters(): units.insert(0, v.pop()) v = float(''.join(v)) units = ''.join(units) if units == 'pt': v *= 65536 elif units == 'pc': v *= 12 * 65536 elif units == 'in': v *= 72.27 * 65536 elif units == 'bp': v *= (72.27 * 65536) / 72 elif units == 'cm': v *= (72.27 * 65536) / 2.54 elif units == 'mm': v *= (72.27 * 65536) / 25.4 elif units == 'dd': v *= (1238.0 * 65536) / 1157 elif units == 'cc': v *= (1238.0 * 12 * 65536) / 1157 elif units == 'sp': pass # Encode fil(ll)s by adding 2, 4, and 6 billion elif units == 'fil': if v < 0: v -= 2e9 else: v += 2e9 elif units == 'fill': if v < 0: v -= 4e9 else: v += 4e9 elif units == 'filll': if v < 0: v -= 6e9 else: v += 6e9 elif units == 'mu': pass # Just estimates, since I don't know the actual font size elif units == 'ex': v *= 5 * 65536 elif units == 'em': v *= 11 * 65536 else: raise ValueError('Unrecognized units: %s' % units) return float.__new__(cls, v)
def groups(self): """ Group index entries into batches according to the first letter """ batches = [] current = '' for item in self: try: label = title = item.sortkey[0].upper() if title in encoding.stringletters(): pass elif title == '_': title = '_ (Underscore)' else: label = title = 'Symbols' except IndexError: label = title = 'Symbols' if current != title: newgroup = self.IndexGroup() newgroup.title = title newgroup.id = label batches.append(newgroup) current = title batches[-1].append(item) for item in batches: item[:] = self.splitColumns(item, self.ownerDocument.config['document']['index-columns']) return batches
def groups(self): """ Group index entries into batches according to the first letter """ batches = [] current = '' for item in self: try: label = title = unidecode(item.sortkey[0]).upper() if title in encoding.stringletters(): pass elif title == '_': title = '_ (Underscore)' else: label = title = 'Symbols' except IndexError: label = title = 'Symbols' if current != title: newgroup = self.IndexGroup() newgroup.title = title newgroup.id = label batches.append(newgroup) current = title batches[-1].append(item) for item in batches: item[:] = self.splitColumns(item, self.ownerDocument.config['document']['index-columns']) return batches
def default(self, node): """ Rendering method for all non-text nodes """ # Handle characters like \&, \$, \%, etc. if len(node.nodeName) == 1 and node.nodeName not in encoding.stringletters(): return self.textDefault(node.nodeName) # Render child nodes return str(node)
def default(self, node): """ Rendering method for all non-text nodes """ # Handle characters like \&, \$, \%, etc. if len(node.nodeName) == 1 and node.nodeName not in encoding.stringletters(): return self.textDefault(node.nodeName) # Render child nodes return unicode(node)
def source(self): name = self.nodeName # Automatically revert internal names like "active::~" escape = '\\' if '::' in name: name = name.split('::').pop() escape = '' # \begin environment # If self.childNodes is not empty, print out the entire environment if self.macroMode == Macro.MODE_BEGIN: argSource = sourceArguments(self) if not argSource: argSource = ' ' s = '%sbegin{%s}%s' % (escape, name, argSource) if self.hasChildNodes(): s += '%s%send{%s}' % (sourceChildren(self), escape, name) return s # \end environment if self.macroMode == Macro.MODE_END: return '%send{%s}' % (escape, name) argSource = sourceArguments(self) if not argSource: argSource = ' ' elif argSource[0] in encoding.stringletters() and\ not (len(name) == 1 and name[0] not in encoding.stringletters()): argSource = ' %s' % argSource s = '%s%s%s' % (escape, name, argSource) # If self.childNodes is not empty, print out the contents if self.attributes and 'self' in list(self.attributes.keys()): pass else: if self.hasChildNodes(): s += sourceChildren(self) return s
def ref(self): if self.refLabel: try: position = int(self.origref.textContent) alph = encoding.stringletters()[position - 1] t = re.sub(r'_Alph_', alph.upper(), str(self.refLabel)) t = re.sub(r'_alph_', alph.lower(), t) t = re.sub(r'_Roman_', numToRoman(position), t) t = re.sub(r'_roman_', numToRoman(position).lower(), t) t = re.sub(r'_arabic_', str(position), t) return t except Exception: pass return self.origref
def term(self, position): alph = encoding.stringletters()[position - 1] if self.listType: t = re.sub(r'(?<!{)I(?!})', numToRoman(position), self.listType) t = re.sub(r'(?<!{)i(?!})', numToRoman(position).lower(), t) t = re.sub(r'(?<!{)1(?!})', str(position), t) t = re.sub(r'(?<!{)A(?!})', alph.upper(), t) t = re.sub(r'(?<!{)a(?!})', alph.lower(), t) elif self.listDepth == 2: t = '({})'.format(alph.lower()) elif self.listDepth == 3: t = '{}.'.format(numToRoman(position).lower()) elif self.listDepth == 4: t = '{}.'.format(alph.upper()) else: t = '{}.'.format(position) return t
def term(self, position): alph = encoding.stringletters()[position - 1] if self.listLabel: t = re.sub(r'_Alph_', alph.upper(), str(self.listLabel)) t = re.sub(r'_alph_', alph.lower(), t) t = re.sub(r'_roman_', numToRoman(position), t) t = re.sub(r'_Roman_', numToRoman(position).lower(), t) t = re.sub(r'_arabic_', str(position), t) elif self.listDepth == 2: t = '({})'.format(alph.lower()) elif self.listDepth == 3: t = '{}.'.format(numToRoman(position).lower()) elif self.listDepth == 4: t = '{}.'.format(alph.upper()) else: t = '{}.'.format(position) return t
def source(self): name = self.nodeName # Automatically revert internal names like "active::~" escape = '\\' if '::' in name: name = name.split('::').pop() escape = '' # \begin environment # If self.childNodes is not empty, print out the entire environment if self.macroMode == Macro.MODE_BEGIN: argSource = sourceArguments(self) if not argSource: argSource = ' ' s = '%sbegin{%s}%s' % (escape, name, argSource) if self.hasChildNodes(): s += '%s%send{%s}' % (sourceChildren(self), escape, name) return s # \end environment if self.macroMode == Macro.MODE_END: return '%send{%s}' % (escape, name) argSource = sourceArguments(self) if not argSource: argSource = ' ' elif argSource[0] in encoding.stringletters(): argSource = ' %s' % argSource s = '%s%s%s' % (escape, name, argSource) # If self.childNodes is not empty, print out the contents if self.attributes and self.attributes.has_key('self'): pass else: if self.hasChildNodes(): s += sourceChildren(self) return s
from StringIO import StringIO # Default TeX categories DEFAULT_CATEGORIES = [ "\\", # 0 - Escape character "{", # 1 - Beginning of group "}", # 2 - End of group "$", # 3 - Math shift "&", # 4 - Alignment tab "\n", # 5 - End of line "#", # 6 - Parameter "^", # 7 - Superscript "_", # 8 - Subscript "\x00", # 9 - Ignored character " \t\r\f", # 10 - Space encoding.stringletters() + "@", # - Letter "", # 12 - Other character - This isn't explicitly defined. If it # isn't any of the other categories, then # it's an "other" character. "~", # 13 - Active character "%", # 14 - Comment character "", # 15 - Invalid character ] VERBATIM_CATEGORIES = [""] * 16 VERBATIM_CATEGORIES[11] = encoding.stringletters() class Token(Text): """ Base class for all TeX tokens """
def arguments(self): """ Compile the argument string into function call arguments Returns: arguments as compiled entities """ tself = type(self) # Check for cached version first if vars(tself).has_key('@arguments'): return vars(tself)['@arguments'] # If the argument string is empty, short circuit if not tself.args: setattr(tself, '@arguments', []) return getattr(tself, '@arguments') # Split the arguments into their primary components args = iter([x.strip() for x in re.split(r'(\w+(?::\w+(?:\(\S\))?(?::\w+)?)?|\W|\s+)', tself.args) if x is not None and x.strip()]) groupings = {'[':'[]','(':'()','<':'<>','{':'{}'} macroargs = [] argdict = {} index = 0 for item in args: # Modifier argument if item in '*+-': if argdict: raise ValueError, \ 'Improperly placed "%s" in argument string "%s"' % \ (item, tself.args) argdict.clear() macroargs.append(Argument('*modifier*', index, {'spec':item})) index += 1 # Optional equals elif item in '=': argdict.clear() macroargs.append(Argument('*equals*', index, {'spec':item})) index += 1 # Beginning of group elif item in '[(<{': argdict.clear() argdict['spec'] = groupings[item] # End of group elif item in '])>}': pass # Argument name (and possibly type) elif item[0] in encoding.stringletters(): parts = item.split(':') item = parts.pop(0) # Parse for types and subtypes if parts: # We already have a type, so check for subtypes # for list items if argdict.has_key('type'): argdict['subtype'] = parts.pop(0) else: # Split type and possible delimiter argdict['type'], argdict['delim'] = re.search(r'(\w+)(?:\((\W)\))?', parts.pop(0)).groups() if parts: argdict['subtype'] = parts.pop(0) # Arguments that are instance variables are always expanded if argdict.get('type') in ['cs','nox']: argdict['expanded'] = False else: argdict['expanded'] = True macroargs.append(Argument(item, index, argdict)) index += 1 argdict.clear() else: raise ValueError, 'Could not parse argument string "%s", reached unexpected "%s"' % (tself.args, item) # Cache the result setattr(tself, '@arguments', macroargs) return macroargs
def Alph(self): return encoding.stringletters()[self.value-1].upper()
from io import BytesIO, StringIO, TextIOWrapper # Default TeX categories DEFAULT_CATEGORIES = [ '\\', # 0 - Escape character '{', # 1 - Beginning of group '}', # 2 - End of group '$', # 3 - Math shift '&', # 4 - Alignment tab '\n', # 5 - End of line '#', # 6 - Parameter '^', # 7 - Superscript '_', # 8 - Subscript '\x00',# 9 - Ignored character ' \t\r\f', # 10 - Space encoding.stringletters() + '@', # - Letter '', # 12 - Other character - This isn't explicitly defined. If it # isn't any of the other categories, then # it's an "other" character. '~', # 13 - Active character '%', # 14 - Comment character '' # 15 - Invalid character ] VERBATIM_CATEGORIES = [''] * 16 VERBATIM_CATEGORIES[11] = encoding.stringletters() class EndInput(Exception): pass class Token(Text):
def arguments(self): """ Compile the argument string into function call arguments Returns: arguments as compiled entities """ tself = type(self) # Check for cached version first if '@arguments' in vars(tself): return vars(tself)['@arguments'] # If the argument string is empty, short circuit if not tself.args: setattr(tself, '@arguments', []) return getattr(tself, '@arguments') # Split the arguments into their primary components args = iter([x.strip() for x in re.split(r'(\w+(?::\w+(?:\(\S\))?(?::\w+)?)?|\W|\s+)', tself.args) if x is not None and x.strip()]) groupings = {'[':'[]', '(':'()', '<':'<>', '{':'{}'} macroargs = [] argdict = {} index = 0 for item in args: # Modifier argument if item in '*+-': if argdict: raise ValueError('Improperly placed "%s" in argument string "%s"' % \ (item, tself.args)) argdict.clear() macroargs.append(Argument('*modifier*', index, {'spec':item})) index += 1 # Optional equals elif item in '=': argdict.clear() macroargs.append(Argument('*equals*', index, {'spec':item})) index += 1 # Beginning of group elif item in '[(<{': argdict.clear() argdict['spec'] = groupings[item] # End of group elif item in '])>}': pass # Argument name (and possibly type) elif item[0] in encoding.stringletters(): parts = item.split(':') item = parts.pop(0) # Parse for types and subtypes if parts: # We already have a type, so check for subtypes # for list items if 'type' in list(argdict.keys()): argdict['subtype'] = parts.pop(0) else: # Split type and possible delimiter argdict['type'], argdict['delim'] = re.search(r'(\w+)(?:\((\W)\))?', parts.pop(0)).groups() if parts: argdict['subtype'] = parts.pop(0) # Arguments that are instance variables are always expanded if argdict.get('type') in ['cs', 'nox']: argdict['expanded'] = False else: argdict['expanded'] = True macroargs.append(Argument(item, index, argdict)) index += 1 argdict.clear() else: raise ValueError('Could not parse argument string "%s", reached unexpected "%s"' % (tself.args, item)) # Cache the result setattr(tself, '@arguments', macroargs) return macroargs
def __iter__(self): """ Iterate over tokens in the input stream Returns: generator that iterates through tokens in the stream """ # Cache variables to prevent globol lookups during generator global Space, EscapeSequence Space = Space EscapeSequence = EscapeSequence buffer = self._tokBuffer charIter = self.iterchars() next = charIter.next context = self.context pushChar = self.pushChar STATE_N = self.STATE_N STATE_M = self.STATE_M STATE_S = self.STATE_S ELEMENT_NODE = Node.ELEMENT_NODE CC_LETTER = Token.CC_LETTER CC_OTHER = Token.CC_OTHER CC_SPACE = Token.CC_SPACE CC_EOL = Token.CC_EOL CC_ESCAPE = Token.CC_ESCAPE CC_EOL = Token.CC_EOL CC_COMMENT = Token.CC_COMMENT CC_ACTIVE = Token.CC_ACTIVE prev = None while 1: # Purge buffer first while buffer: yield buffer.pop(0) # Get the next character token = next() if token.nodeType == ELEMENT_NODE: raise ValueError, "Expanded tokens should never make it here" code = token.catcode # Short circuit letters and other since they are so common if code == CC_LETTER or code == CC_OTHER: self.state = STATE_M # Whitespace elif code == CC_SPACE: if self.state == STATE_S or self.state == STATE_N: continue self.state = STATE_S token = Space(u" ") # End of line elif code == CC_EOL: state = self.state if state == STATE_S: self.state = STATE_N continue elif state == STATE_M: token = Space(" ") code = CC_SPACE self.state = STATE_N elif state == STATE_N: # ord(token) != 10 is the same as saying token != '\n' # but it is much faster. if ord(token) != 10: self.lineNumber += 1 self.readline() token = EscapeSequence("par") # Prevent adjacent paragraphs if prev == token: continue code = CC_ESCAPE # Escape sequence elif code == CC_ESCAPE: # Get name of command sequence self.state = STATE_M for token in charIter: if token.catcode == CC_LETTER: word = [token] for t in charIter: if t.catcode == CC_LETTER: word.append(t) else: pushChar(t) break token = EscapeSequence("".join(word)) elif token.catcode == CC_EOL: # pushChar(token) # token = EscapeSequence() token = Space(" ") self.state = STATE_S else: token = EscapeSequence(token) # # Because we can implement macros both in LaTeX and Python, we don't # always want the whitespace to be eaten. For example, implementing # \chardef\%=`% would be \char{`%} in TeX, but in Python it's just # another macro class that would eat whitspace incorrectly. So we # have to do this kind of thing in the parse() method of Macro. # if token.catcode != CC_EOL: # HACK: I couldn't get the parse() thing to work so I'm just not # going to parse whitespace after EscapeSequences that end in # non-letter characters as a half-assed solution. if token[-1] in encoding.stringletters(): # Absorb following whitespace self.state = STATE_S break else: token = EscapeSequence() # Check for any \let aliases token = context.lets.get(token, token) # TODO: This action should be generalized so that the # tokens are processed recursively if token is not token and token.catcode == CC_COMMENT: self.readline() self.lineNumber += 1 self.state = STATE_N continue elif code == CC_COMMENT: self.readline() self.lineNumber += 1 self.state = STATE_N continue elif code == CC_ACTIVE: token = EscapeSequence("active::%s" % token) token = context.lets.get(token, token) self.state = STATE_M else: self.state = STATE_M prev = token yield token
def Alph(self): return encoding.stringletters()[self.value - 1].upper()
def __iter__(self): """ Iterate over tokens in the input stream Returns: generator that iterates through tokens in the stream """ # Cache variables to prevent globol lookups during generator global Space, EscapeSequence Space = Space EscapeSequence = EscapeSequence mybuffer = self._tokBuffer charIter = self.iterchars() context = self.context pushChar = self.pushChar STATE_N = self.STATE_N STATE_M = self.STATE_M STATE_S = self.STATE_S ELEMENT_NODE = Node.ELEMENT_NODE CC_LETTER = Token.CC_LETTER CC_OTHER = Token.CC_OTHER CC_SPACE = Token.CC_SPACE CC_EOL = Token.CC_EOL CC_ESCAPE = Token.CC_ESCAPE CC_EOL = Token.CC_EOL CC_COMMENT = Token.CC_COMMENT CC_ACTIVE = Token.CC_ACTIVE prev = None while 1: # Purge mybuffer first while mybuffer: yield mybuffer.pop(0) # Get the next character try: token = next(charIter) except StopIteration: raise EndInput if token.nodeType == ELEMENT_NODE: raise ValueError('Expanded tokens should never make it here') code = token.catcode # Short circuit letters and other since they are so common if code == CC_LETTER or code == CC_OTHER: self.state = STATE_M # Whitespace elif code == CC_SPACE: if self.state == STATE_S or self.state == STATE_N: continue self.state = STATE_S token = Space(' ') # End of line elif code == CC_EOL: state = self.state if state == STATE_S: self.state = STATE_N continue elif state == STATE_M: token = Space(' ') code = CC_SPACE self.state = STATE_N elif state == STATE_N: # ord(token) != 10 is the same as saying token != '\n' # but it is much faster. if ord(token) != 10: self.lineNumber += 1 self.readline() token = EscapeSequence('par') # Prevent adjacent paragraphs if prev == token: continue code = CC_ESCAPE # Escape sequence elif code == CC_ESCAPE: # Get name of command sequence self.state = STATE_M for token in charIter: if token.catcode == CC_LETTER: word = [token] for t in charIter: if t.catcode == CC_LETTER: word.append(t) else: pushChar(t) break token = EscapeSequence(''.join(word)) elif token.catcode == CC_EOL: #pushChar(token) #token = EscapeSequence() token = Space(' ') self.state = STATE_S else: token = EscapeSequence(token) # # Because we can implement macros both in LaTeX and Python, we don't # always want the whitespace to be eaten. For example, implementing # \chardef\%=`% would be \char{`%} in TeX, but in Python it's just # another macro class that would eat whitspace incorrectly. So we # have to do this kind of thing in the parse() method of Macro. # if token.catcode != CC_EOL: # HACK: I couldn't get the parse() thing to work so I'm just not # going to parse whitespace after EscapeSequences that end in # non-letter characters as a half-assed solution. if token[-1] in encoding.stringletters(): # Absorb following whitespace self.state = STATE_S break else: token = EscapeSequence() # Check for any \let aliases token = context.lets.get(token, token) # TODO: This action should be generalized so that the # tokens are processed recursively if token is not token and token.catcode == CC_COMMENT: self.readline() self.lineNumber += 1 self.state = STATE_N continue elif code == CC_COMMENT: self.readline() self.lineNumber += 1 self.state = STATE_N continue elif code == CC_ACTIVE: token = EscapeSequence('active::%s' % token) token = context.lets.get(token, token) self.state = STATE_M else: self.state = STATE_M prev = token yield token