Beispiel #1
0
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.strings['name1'] = 'value1'
     bib_database.strings['name2'] = 'value2'  # Order is important!
     result = bibdeskparser.dumps(bib_database)
     expected = '@string{name1 = {value1}}\n\n@string{name2 = {value2}}\n\n'
     self.assertEqual(result, expected)
Beispiel #2
0
 def test_write_common_strings(self):
     bib_database = BibDatabase()
     bib_database.load_common_strings()
     writer = BibTexWriter(write_common_strings=True)
     result = bibdeskparser.dumps(bib_database, writer=writer)
     with io.open('tests/data/common_strings.bib') as f:
         expected = f.read()
     self.assertEqual(result, expected)
Beispiel #3
0
 def test_write_dependent_strings(self):
     bib_database = BibDatabase()
     bib_database.strings['title'] = 'Mr'
     expr = BibDataStringExpression(
         [BibDataString(bib_database, 'title'), 'Smith']
     )
     bib_database.strings['name'] = expr
     result = bibdeskparser.dumps(bib_database)
     expected = (
         '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n'
     )
     self.assertEqual(result, expected)
    def test_align(self):
        bib_database = BibDatabase()
        bib_database.entries = [{
            'ID': 'abc123',
            'ENTRYTYPE': 'book',
            'author': 'test',
            'thisisaverylongkey': 'longvalue',
        }]
        writer = BibTexWriter()
        writer.align_values = True
        result = bibdeskparser.dumps(bib_database, writer)
        expected = """@book{abc123,
 author             = {test},
 thisisaverylongkey = {longvalue}
}

"""
        self.assertEqual(result, expected)

        with open(
                'tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibdeskparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.align_values = True
        result = bibdeskparser.dumps(bib_database, writer)
        expected = """@book{Toto3000,
 author    = {Toto, A and Titi, B},
 title     = {A title}
}

@article{Wigner1938,
 author    = {Wigner, E.},
 doi       = {10.1039/TF9383400029},
 issn      = {0014-7672},
 journal   = {Trans. Faraday Soc.},
 owner     = {fr},
 pages     = {29--41},
 publisher = {The Royal Society of Chemistry},
 title     = {The transition state method},
 volume    = {34},
 year      = {1938}
}

@book{Yablon2005,
 author    = {Yablon, A.D.},
 publisher = {Springer},
 title     = {Optical fiber fusion slicing},
 year      = {2005}
}

"""
        self.assertEqual(result, expected)
    def test_entry_separator(self):
        bib_database = BibDatabase()
        bib_database.entries = [{
            'ID': 'abc123',
            'ENTRYTYPE': 'book',
            'author': 'test'
        }]
        writer = BibTexWriter()
        writer.entry_separator = ''
        result = bibdeskparser.dumps(bib_database, writer)
        expected = """@book{abc123,
 author = {test}
}
"""
        self.assertEqual(result, expected)
 def test_sort_missing_field(self):
     bib_database = BibDatabase()
     bib_database.entries = [
         {
             'ID': 'b',
             'ENTRYTYPE': 'article',
             'year': '2000'
         },
         {
             'ID': 'c',
             'ENTRYTYPE': 'book',
             'year': '2010'
         },
         {
             'ID': 'a',
             'ENTRYTYPE': 'book'
         },
     ]
     writer = BibTexWriter()
     writer.order_entries_by = ('year', )
     result = bibdeskparser.dumps(bib_database, writer)
     expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
     self.assertEqual(result, expected)
Beispiel #7
0
    def _entries_to_bibtex(self, bib_database):
        bibtex = ''
        if self.order_entries_by:
            # TODO: allow sort field does not exist for entry
            entries = sorted(
                bib_database.entries,
                key=lambda x: BibDatabase.entry_sort_key(
                    x, self.order_entries_by),
            )
        else:
            entries = bib_database.entries

        if self.align_values:
            # determine maximum field width to be used
            widths = [max(map(len, entry.keys())) for entry in entries]
            self._max_field_width = max(widths)

        for entry in entries:
            bibtex += self._entry_to_bibtex(entry)
        return bibtex
Beispiel #8
0
 def test_single_string_write(self):
     bib_database = BibDatabase()
     bib_database.strings['name1'] = 'value1'
     result = bibdeskparser.dumps(bib_database)
     expected = '@string{name1 = {value1}}\n\n'
     self.assertEqual(result, expected)
Beispiel #9
0
 def setUp(self):
     self.bd = BibDatabase()
Beispiel #10
0
 def setUp(self):
     self.bd = BibDatabase()
     self.bd.strings['name'] = 'value'
     self.bds = BibDataString(self.bd, 'name')
Beispiel #11
0
 def test_single_preamble_write(self):
     bib_database = BibDatabase()
     bib_database.preambles = [' a ']
     result = bibdeskparser.dumps(bib_database)
     expected = '@preamble{" a "}\n\n'
     self.assertEqual(result, expected)
class TestEntrySorting(unittest.TestCase):
    bib_database = BibDatabase()
    bib_database.entries = [
        {
            'ID': 'b',
            'ENTRYTYPE': 'article'
        },
        {
            'ID': 'c',
            'ENTRYTYPE': 'book'
        },
        {
            'ID': 'a',
            'ENTRYTYPE': 'book'
        },
    ]

    def test_sort_default(self):
        result = bibdeskparser.dumps(self.bib_database)
        expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
        self.assertEqual(result, expected)

    def test_sort_none(self):
        writer = BibTexWriter()
        writer.order_entries_by = None
        result = bibdeskparser.dumps(self.bib_database, writer)
        expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
        self.assertEqual(result, expected)

    def test_sort_id(self):
        writer = BibTexWriter()
        writer.order_entries_by = ('ID', )
        result = bibdeskparser.dumps(self.bib_database, writer)
        expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
        self.assertEqual(result, expected)

    def test_sort_type(self):
        writer = BibTexWriter()
        writer.order_entries_by = ('ENTRYTYPE', )
        result = bibdeskparser.dumps(self.bib_database, writer)
        expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
        self.assertEqual(result, expected)

    def test_sort_type_id(self):
        writer = BibTexWriter()
        writer.order_entries_by = ('ENTRYTYPE', 'ID')
        result = bibdeskparser.dumps(self.bib_database, writer)
        expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n"
        self.assertEqual(result, expected)

    def test_sort_missing_field(self):
        bib_database = BibDatabase()
        bib_database.entries = [
            {
                'ID': 'b',
                'ENTRYTYPE': 'article',
                'year': '2000'
            },
            {
                'ID': 'c',
                'ENTRYTYPE': 'book',
                'year': '2010'
            },
            {
                'ID': 'a',
                'ENTRYTYPE': 'book'
            },
        ]
        writer = BibTexWriter()
        writer.order_entries_by = ('year', )
        result = bibdeskparser.dumps(bib_database, writer)
        expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
        self.assertEqual(result, expected)

    def test_unicode_problems(self):
        # See #51
        bibtex = """
        @article{Mesa-Gresa2013,
            abstract = {During a 4-week period half the mice (n = 16) were exposed to EE and the other half (n = 16) remained in a standard environment (SE). Aggr. Behav. 9999:XX-XX, 2013. © 2013 Wiley Periodicals, Inc.},
            author = {Mesa-Gresa, Patricia and P\'{e}rez-Martinez, Asunci\'{o}n and Redolat, Rosa},
            doi = {10.1002/ab.21481},
            file = {:Users/jscholz/Documents/mendeley/Mesa-Gresa, P\'{e}rez-Martinez, Redolat - 2013 - Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior.pdf:pdf},
            issn = {1098-2337},
            journal = {Aggressive behavior},
            month = "apr",
            number = {April},
            pages = {269--279},
            pmid = {23588702},
            title = {{Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior in Male Mice.}},
            url = {http://www.ncbi.nlm.nih.gov/pubmed/23588702},
            volume = {39},
            year = {2013}
        }
        """
        bibdb = bibdeskparser.loads(bibtex)
        with tempfile.TemporaryFile(mode='w+') as bibtex_file:
            bibdeskparser.dump(bibdb, bibtex_file)
Beispiel #13
0
 def test_entries_dict_prop(self):
     bib_db = BibDatabase()
     bib_db.entries = self.entries
     self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict())
Beispiel #14
0
 def test_ignore_common_strings(self):
     bib_database = BibDatabase()
     bib_database.load_common_strings()
     result = bibdeskparser.dumps(bib_database)
     self.assertEqual(result, '')
Beispiel #15
0
    def __init__(
        self,
        data=None,
        customization=None,
        ignore_nonstandard_types=True,
        homogenize_fields=False,
        interpolate_strings=True,
        common_strings=False,
        add_missing_from_crossref=False,
    ):
        """
        Creates a parser for rading BibTeX files

        :return: parser
        :rtype: `BibTexParser`
        """
        self.bib_database = BibDatabase()

        #: Load common strings such as months abbreviation
        #: Default: `False`.
        self.common_strings = common_strings
        if self.common_strings:
            self.bib_database.load_common_strings()

        #: Callback function to process BibTeX entries after parsing,
        #: for example to create a list from a string with multiple values.
        #: By default all BibTeX values are treated as simple strings.
        #: Default: `None`.
        self.customization = customization

        #: Ignore non-standard BibTeX types (`book`, `article`, etc).
        #: Default: `True`.
        self.ignore_nonstandard_types = ignore_nonstandard_types

        #: Sanitize BibTeX field names, for example change `url` to `link` etc.
        #: Field names are always converted to lowercase names.
        #: Default: `False`.
        self.homogenize_fields = homogenize_fields

        #: Interpolate Bibtex Strings or keep the structure
        self.interpolate_strings = interpolate_strings

        # On some sample data files, the character encoding detection simply
        # hangs We are going to default to utf8, and mandate it.
        self.encoding = 'utf8'

        # Add missing field from cross-ref
        self.add_missing_from_crossref = add_missing_from_crossref

        # pre-defined set of key changes
        self.alt_dict = {
            'keyw': u'keyword',
            'keywords': u'keyword',
            'authors': u'author',
            'editors': u'editor',
            'urls': u'url',
            'link': u'url',
            'links': u'url',
            'subjects': u'subject',
            'xref': u'crossref',
        }

        # Setup the parser expression
        self._init_expressions()
Beispiel #16
0
class BibTexParser:
    """
    A parser for reading BibTeX bibliographic data files.

    Example::

        from bibdeskparser.bparser import BibTexParser

        bibtex_str = ...

        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        parser.homogenize_fields = False
        parser.common_strings = False
        bib_database = bibdeskparser.loads(bibtex_str, parser)

    :param customization: function or None (default)
        Customization to apply to parsed entries.
    :param ignore_nonstandard_types: bool (default True)
        If True ignores non-standard bibtex entry types.
    :param homogenize_fields: bool (default False)
        Common field name replacements (as set in alt_dict attribute).
    :param interpolate_strings: bool (default True)
        If True, replace bibtex string by their value, else uses
        BibDataString objects.
    :param common_strings: bool (default False)
        Include common string definitions (e.g. month abbreviations) to
        the bibtex file.
    :param add_missing_from_crossref: bool (default False)
        Resolve BibTeX references set in the crossref field for BibTeX entries
        and add the fields from the referenced entry to the referencing entry.
    """
    def __new__(cls, data=None, **args):
        """
        To catch the old API structure in which creating the parser would
        immediately parse and return data.
        """

        if data is None:
            return super(BibTexParser, cls).__new__(cls)
        else:
            # For backwards compatibility: if data is given, parse
            # and return the `BibDatabase` object instead of the parser.
            return parse(data, **args)

    def __init__(
        self,
        data=None,
        customization=None,
        ignore_nonstandard_types=True,
        homogenize_fields=False,
        interpolate_strings=True,
        common_strings=False,
        add_missing_from_crossref=False,
    ):
        """
        Creates a parser for rading BibTeX files

        :return: parser
        :rtype: `BibTexParser`
        """
        self.bib_database = BibDatabase()

        #: Load common strings such as months abbreviation
        #: Default: `False`.
        self.common_strings = common_strings
        if self.common_strings:
            self.bib_database.load_common_strings()

        #: Callback function to process BibTeX entries after parsing,
        #: for example to create a list from a string with multiple values.
        #: By default all BibTeX values are treated as simple strings.
        #: Default: `None`.
        self.customization = customization

        #: Ignore non-standard BibTeX types (`book`, `article`, etc).
        #: Default: `True`.
        self.ignore_nonstandard_types = ignore_nonstandard_types

        #: Sanitize BibTeX field names, for example change `url` to `link` etc.
        #: Field names are always converted to lowercase names.
        #: Default: `False`.
        self.homogenize_fields = homogenize_fields

        #: Interpolate Bibtex Strings or keep the structure
        self.interpolate_strings = interpolate_strings

        # On some sample data files, the character encoding detection simply
        # hangs We are going to default to utf8, and mandate it.
        self.encoding = 'utf8'

        # Add missing field from cross-ref
        self.add_missing_from_crossref = add_missing_from_crossref

        # pre-defined set of key changes
        self.alt_dict = {
            'keyw': u'keyword',
            'keywords': u'keyword',
            'authors': u'author',
            'editors': u'editor',
            'urls': u'url',
            'link': u'url',
            'links': u'url',
            'subjects': u'subject',
            'xref': u'crossref',
        }

        # Setup the parser expression
        self._init_expressions()

    def parse(self, bibtex_str, partial=False):
        """Parse a BibTeX string into an object

        :param bibtex_str: BibTeX string
        :type: str
        :param partial: If True, print errors only on parsing failures.
            If False, an exception is raised.
        :type: bool
        :return: bibliographic database
        :rtype: BibDatabase
        """
        bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
        try:
            self._expr.parseFile(bibtex_file_obj)
        except self._expr.ParseException as exc:
            logger.error("Could not parse properly, starting at %s", exc.line)
            if not partial:
                raise exc

        if self.add_missing_from_crossref:
            self.bib_database.add_missing_from_crossref()

        return self.bib_database

    def parse_file(self, file, partial=False):
        """Parse a BibTeX file into an object

        :param file: BibTeX file or file-like object
        :type: typing.IO
        :param partial: If True, print errors only on parsing failures.
            If False, an exception is raised.
        :type: bool
        :return: bibliographic database
        :rtype: BibDatabase
        """
        return self.parse(file.read(), partial=partial)

    def _init_expressions(self):
        """
        Defines all parser expressions used internally.
        """
        self._expr = BibtexExpression()

        # Handle string as BibDataString object
        self._expr.set_string_name_parse_action(
            lambda s, l, t: BibDataString(self.bib_database, t[0]))
        if self.interpolate_strings:
            maybe_interpolate = lambda expr: as_text(expr)
        else:
            maybe_interpolate = lambda expr: expr
        self._expr.set_string_expression_parse_action(
            lambda s, l, t: maybe_interpolate(
                BibDataStringExpression.expression_if_needed(t)))

        # Add notice to logger
        self._expr.add_log_function(logger.debug)

        # Set actions
        self._expr.entry.addParseAction(lambda s, l, t: self._add_entry(
            t.get('EntryType'), t.get('Key'), t.get('Fields')))
        self._expr.implicit_comment.addParseAction(
            lambda s, l, t: self._add_comment(t[0]))
        self._expr.explicit_comment.addParseAction(
            lambda s, l, t: self._add_comment(t[0]))
        self._expr.preamble_decl.addParseAction(
            lambda s, l, t: self._add_preamble(t[0]))
        self._expr.string_def.addParseAction(lambda s, l, t: self._add_string(
            t['StringName'].name, t['StringValue']))

    def _bibtex_file_obj(self, bibtex_str):
        # Some files have Byte-order marks inserted at the start
        byte = b'\xef\xbb\xbf'
        if isinstance(bibtex_str, str):
            byte = str(byte, self.encoding, 'ignore')
            if bibtex_str[0] == byte:
                bibtex_str = bibtex_str[1:]
        else:
            if bibtex_str[:3] == byte:
                bibtex_str = bibtex_str[3:]
            bibtex_str = bibtex_str.decode(encoding=self.encoding)
        return io.StringIO(bibtex_str)

    def _clean_val(self, val):
        """ Clean instring before adding to dictionary

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        if not val or val == "{}":
            return ''
        return val

    def _clean_key(self, key):
        """ Lowercase a key and return as str.

        :param key: a key
        :type key: str
        :returns: (str) string-value
        """
        key = key.lower()
        if not isinstance(key, str):
            return str(key, 'utf-8')
        else:
            return key

    def _clean_field_key(self, key):
        """ Clean a bibtex field key and homogenize alternative forms.

        :param key: a key
        :type key: str
        :returns: string-value
        """
        key = self._clean_key(key)
        if self.homogenize_fields:
            if key in list(self.alt_dict.keys()):
                key = self.alt_dict[key]
        return key

    def _add_entry(self, entry_type, entry_id, fields):
        """ Adds a parsed entry.
        Includes checking type and fields, cleaning, applying customizations.

        :param entry_type: the entry type
        :type entry_type: str
        :param entry_id: the entry bibid
        :type entry_id: str
        :param fields: the fields and values
        :type fields: dictionary
        :returns: string-value
        """
        d = {}
        entry_type = self._clean_key(entry_type)
        if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES:
            logger.warning('Entry type %s not standard. Not considered.',
                           entry_type)
            return
        for key in fields:
            d[self._clean_field_key(key)] = self._clean_val(fields[key])
        d['ENTRYTYPE'] = entry_type
        d['ID'] = entry_id

        crossref = d.get('crossref', None)
        if self.add_missing_from_crossref and crossref is not None:
            d['_crossref'] = crossref

        if self.customization is not None:
            logger.debug('Apply customizations and return dict')
            d = self.customization(d)

        self.bib_database.entries.append(d)

    def _add_comment(self, comment):
        """
        Stores a comment in the list of comment.

        :param comment: the parsed comment
        :type comment: str
        """
        logger.debug('Store comment in list of comments: ' +
                     comment.__repr__())
        self.bib_database.comments.append(comment)

    def _add_string(self, string_key, string):
        """
        Stores a new string in the string dictionary.

        :param string_key: the string key
        :type string_key: str
        :param string: the string value
        :type string: str
        """
        if string_key in self.bib_database.strings:
            logger.warning('Overwritting existing string for key: %s.',
                           string_key)
        logger.debug(u'Store string: {} -> {}'.format(string_key, string))
        self.bib_database.strings[string_key] = self._clean_val(string)

    def _add_preamble(self, preamble):
        """
        Stores a preamble.

        :param preamble: the parsed preamble
        :type preamble: str
        """
        logger.debug('Store preamble in list of preambles')
        self.bib_database.preambles.append(preamble)
Beispiel #17
0
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.preambles = [' a ', 'b']
     result = bibdeskparser.dumps(bib_database)
     expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
     self.assertEqual(result, expected)
Beispiel #18
0
 def test_ignore_common_strings_only_if_not_overloaded(self):
     bib_database = BibDatabase()
     bib_database.load_common_strings()
     bib_database.strings['jan'] = 'Janvier'
     result = bibdeskparser.dumps(bib_database)
     self.assertEqual(result, '@string{jan = {Janvier}}\n\n')
Beispiel #19
0
 def test_entries_list_method(self):
     bib_db = BibDatabase()
     bib_db.entries = self.entries
     self.assertEqual(bib_db.entries, bib_db.get_entry_list())