Example #1
0
    def parse(self, s):
        self.tokens = self.lexer.tokenize(s)
        self._current_token = 0
        self._tokens_len = len(self.tokens)
        self._mark_locations = []

        if self._tokens_len < 0:
            raise SyntaxError('could not find any entries')

        self.database = database = Database()

        while True:
            try:
                self._advance()
            except IndexError:
                self.unexpected_token('preamble, string, entry_start, or eof')

            token_type = self.token_type
            if token_type == 'PREAMBLE':
                preamble = self.preamble()
                database.add_preamble(
                    self._handle_value(preamble.contents)
                )
            elif token_type == 'STRING':
                string = self.string()
                database.add_macro(
                    string.key,
                    self._handle_value(string.value)
                )
            elif token_type == 'ENTRY_START':
                entry_node = self.entry()

                entry = Entry(
                    entry_node.entry_type,
                    entry_node.key.value
                )

                for field in entry_node.fields:
                    entry[field.key] = self._handle_value(field.value)
                    if field.key in Name.NAME_FIELDS:
                        entry[field.key] = ' and '.join(
                            (unicode(Name(s)) for s in
                                tokenize_list(entry[field.key])))

                database.add_entry(entry)
            elif token_type == 'EOF':
                return database
            else:
                self.unexpected_token('preamble, string, entry_start, or eof')
def get_names(contents):
    u'''
    Work-horse function to extract all the names defined in the current bib file.
    '''
    names = []

    in_entry = False
    pos = 0
    contents_length = len(contents)

    while True:
        if not in_entry:
            matcher = re.search(NAME_FIELD_REGEX, contents[pos:])
            # no more `name =` fields
            if not matcher:
                break

            pos += matcher.end()
            in_entry = True
        else:
            chars = []

            bracket_depth = 1
            for c in contents[pos:]:
                if c == '}':
                    bracket_depth -= 1

                if bracket_depth == 0:
                    break

                if c == '{':
                    bracket_depth += 1

                chars.append(c)

            names.extend([
                unicode(Name(s)) for s in tokenize_list(u''.join(chars))
            ])

            pos += len(chars)
            if pos >= contents_length:
                break
            in_entry = False

    return sorted(set(names))
def get_names(contents):
    u'''
    Work-horse function to extract all the names defined in the current bib file.
    '''
    names = []

    in_entry = False
    pos = 0
    contents_length = len(contents)

    while True:
        if not in_entry:
            matcher = re.search(NAME_FIELD_REGEX, contents[pos:])
            # no more `name =` fields
            if not matcher:
                break

            pos += matcher.end()
            in_entry = True
        else:
            chars = []

            bracket_depth = 1
            for c in contents[pos:]:
                if c == '}':
                    bracket_depth -= 1

                if bracket_depth == 0:
                    break

                if c == '{':
                    bracket_depth += 1

                chars.append(c)

            names.extend(
                [unicode(Name(s)) for s in tokenize_list(u''.join(chars))])

            pos += len(chars)
            if pos >= contents_length:
                break
            in_entry = False

    return sorted(set(names))
Example #4
0
    def parse(self, s):
        self.tokens = self.lexer.tokenize(s)
        self._current_token = 0
        self._tokens_len = len(self.tokens)
        self._mark_locations = []

        if self._tokens_len < 0:
            raise SyntaxError('could not find any entries')

        self.database = database = Database()

        while True:
            try:
                self._advance()
            except IndexError:
                self.unexpected_token('preamble, string, entry_start, or eof')

            token_type = self.token_type
            if token_type == 'PREAMBLE':
                preamble = self.preamble()
                database.add_preamble(self._handle_value(preamble.contents))
            elif token_type == 'STRING':
                string = self.string()
                database.add_macro(string.key,
                                   self._handle_value(string.value))
            elif token_type == 'ENTRY_START':
                entry_node = self.entry()

                entry = Entry(entry_node.entry_type, entry_node.key.value)

                for field in entry_node.fields:
                    entry[field.key] = self._handle_value(field.value)
                    if field.key in Name.NAME_FIELDS:
                        entry[field.key] = ' and '.join(
                            (unicode(Name(s))
                             for s in tokenize_list(entry[field.key])))

                database.add_entry(entry)
            elif token_type == 'EOF':
                return database
            else:
                self.unexpected_token('preamble, string, entry_start, or eof')
Example #5
0
    def __getitem__(self, key):
        if not key:
            return u''

        key = key.lower()
        result = None

        short = False
        if key.endswith('_short'):
            short = True
            key = key[:-6]

        if key == 'keyword' or key == 'citekey':
            return self.entry.cite_key

        if key in Name.NAME_FIELDS:
            people = []
            for x in tokenize_list(self.entry[key]):
                if x.strip() == '':
                    continue

                try:
                    people.append(Name(x))
                except:
                    print(
                        u'Error handling field "{0}" with value "{1}"'.format(
                            key, x))
                    traceback.print_exc()

            if len(people) == 0:
                return u''

            if short:
                result = _get_people_short(people)
            else:
                result = _get_people_long(people)

        if not result:
            result = self.entry[key]

        return remove_latex_commands(codecs.decode(result, 'latex'))
Example #6
0
    def __getitem__(self, key):
        if not key:
            return u''

        key = key.lower()
        result = None

        short = False
        if key.endswith('_short'):
            short = True
            key = key[:-6]

        if key == 'keyword' or key == 'citekey':
            return self.entry.cite_key

        if key in Name.NAME_FIELDS:
            people = []
            for x in tokenize_list(self.entry[key]):
                if x.strip() == '':
                    continue

                try:
                    people.append(Name(x))
                except:
                    print(u'Error handling field "{0}" with value "{1}"'.format(
                        key, x
                    ))
                    traceback.print_exc()

            if len(people) == 0:
                return u''

            if short:
                result = _get_people_short(people)
            else:
                result = _get_people_long(people)

        if not result:
            result = self.entry[key]

        return remove_latex_commands(codecs.decode(result, 'latex'))