def parse(self, s): self.tokens = self.lexer.tokenize(s) self._current_token = 0 self._tokens_len = len(self.tokens) self._mark_locations = [] if self._tokens_len < 0: raise SyntaxError('could not find any entries') self.database = database = Database() while True: try: self._advance() except IndexError: self.unexpected_token('preamble, string, entry_start, or eof') token_type = self.token_type if token_type == 'PREAMBLE': preamble = self.preamble() database.add_preamble( self._handle_value(preamble.contents) ) elif token_type == 'STRING': string = self.string() database.add_macro( string.key, self._handle_value(string.value) ) elif token_type == 'ENTRY_START': entry_node = self.entry() entry = Entry( entry_node.entry_type, entry_node.key.value ) for field in entry_node.fields: entry[field.key] = self._handle_value(field.value) if field.key in Name.NAME_FIELDS: entry[field.key] = ' and '.join( (unicode(Name(s)) for s in tokenize_list(entry[field.key]))) database.add_entry(entry) elif token_type == 'EOF': return database else: self.unexpected_token('preamble, string, entry_start, or eof')
def get_names(contents): u''' Work-horse function to extract all the names defined in the current bib file. ''' names = [] in_entry = False pos = 0 contents_length = len(contents) while True: if not in_entry: matcher = re.search(NAME_FIELD_REGEX, contents[pos:]) # no more `name =` fields if not matcher: break pos += matcher.end() in_entry = True else: chars = [] bracket_depth = 1 for c in contents[pos:]: if c == '}': bracket_depth -= 1 if bracket_depth == 0: break if c == '{': bracket_depth += 1 chars.append(c) names.extend([ unicode(Name(s)) for s in tokenize_list(u''.join(chars)) ]) pos += len(chars) if pos >= contents_length: break in_entry = False return sorted(set(names))
def get_names(contents): u''' Work-horse function to extract all the names defined in the current bib file. ''' names = [] in_entry = False pos = 0 contents_length = len(contents) while True: if not in_entry: matcher = re.search(NAME_FIELD_REGEX, contents[pos:]) # no more `name =` fields if not matcher: break pos += matcher.end() in_entry = True else: chars = [] bracket_depth = 1 for c in contents[pos:]: if c == '}': bracket_depth -= 1 if bracket_depth == 0: break if c == '{': bracket_depth += 1 chars.append(c) names.extend( [unicode(Name(s)) for s in tokenize_list(u''.join(chars))]) pos += len(chars) if pos >= contents_length: break in_entry = False return sorted(set(names))
def parse(self, s): self.tokens = self.lexer.tokenize(s) self._current_token = 0 self._tokens_len = len(self.tokens) self._mark_locations = [] if self._tokens_len < 0: raise SyntaxError('could not find any entries') self.database = database = Database() while True: try: self._advance() except IndexError: self.unexpected_token('preamble, string, entry_start, or eof') token_type = self.token_type if token_type == 'PREAMBLE': preamble = self.preamble() database.add_preamble(self._handle_value(preamble.contents)) elif token_type == 'STRING': string = self.string() database.add_macro(string.key, self._handle_value(string.value)) elif token_type == 'ENTRY_START': entry_node = self.entry() entry = Entry(entry_node.entry_type, entry_node.key.value) for field in entry_node.fields: entry[field.key] = self._handle_value(field.value) if field.key in Name.NAME_FIELDS: entry[field.key] = ' and '.join( (unicode(Name(s)) for s in tokenize_list(entry[field.key]))) database.add_entry(entry) elif token_type == 'EOF': return database else: self.unexpected_token('preamble, string, entry_start, or eof')
def __getitem__(self, key): if not key: return u'' key = key.lower() result = None short = False if key.endswith('_short'): short = True key = key[:-6] if key == 'keyword' or key == 'citekey': return self.entry.cite_key if key in Name.NAME_FIELDS: people = [] for x in tokenize_list(self.entry[key]): if x.strip() == '': continue try: people.append(Name(x)) except: print( u'Error handling field "{0}" with value "{1}"'.format( key, x)) traceback.print_exc() if len(people) == 0: return u'' if short: result = _get_people_short(people) else: result = _get_people_long(people) if not result: result = self.entry[key] return remove_latex_commands(codecs.decode(result, 'latex'))
def __getitem__(self, key): if not key: return u'' key = key.lower() result = None short = False if key.endswith('_short'): short = True key = key[:-6] if key == 'keyword' or key == 'citekey': return self.entry.cite_key if key in Name.NAME_FIELDS: people = [] for x in tokenize_list(self.entry[key]): if x.strip() == '': continue try: people.append(Name(x)) except: print(u'Error handling field "{0}" with value "{1}"'.format( key, x )) traceback.print_exc() if len(people) == 0: return u'' if short: result = _get_people_short(people) else: result = _get_people_long(people) if not result: result = self.entry[key] return remove_latex_commands(codecs.decode(result, 'latex'))