コード例 #1
0
def run(aux_file, cache_file, bibboost_bib_file):
    """
    This function assumes that we are in the folder of aux_file, and that aux_file is just a `basename`
    :param aux_file: the aux file the user want to process
    :param cache_file: the file in which the cache will be stored
    :param bibboost_bib_file: the file which contains the resulting bibtex entries
    :return:
    """
    bib_files, citations = bibboost.aux.parse_aux_file(aux_file)

    with bibboost.cache.CacheBib(cache_file) as cache:
        cache.update_db(bib_files)
        used_entries = OrderedCaseInsensitiveDict(
            (key, entry) for key, entry in cache.get_entries(citations))

    missed_entries = [key for key in citations if key not in used_entries]
    if len(missed_entries) > 0:
        logging.warning("missing entries: {}".format(
            ", ".join(missed_entries)))

    with open(bibboost_bib_file, "w") as f:
        f.write("""% This file is generated by bibboost
% It should not be modified manually


""")
        f.write("\n\n".join(e for e in used_entries.itervalues()))

    bibboost.aux.change_bib_file(aux_file, bibboost_bib_file)
コード例 #2
0
class Entry(object):
    """A bibliography entry."""

    key = None
    """Entry key (for example, ``'fukushima1980neocognitron'``)."""
    def __init__(self, type_, fields=None, persons=None, collection=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        """Entry type (``'book'``, ``'article'``, etc.)."""
        self.original_type = type_

        self.fields = FieldDict(self, fields)
        """A dictionary of entry fields.
        The dictionary is ordered and case-insensitive."""

        self.persons = OrderedCaseInsensitiveDict(persons)
        """A dictionary of entry persons, by their roles.

        The most often used roles are ``'author'`` and ``'editor'``.
        """

        self.collection = collection

        # for BibTeX interpreter
        self.vars = {}

    def __eq__(self, other):
        if not isinstance(other, Entry):
            return super(Entry, self) == other
        return (self.type == other.type and self.fields == other.fields
                and self.persons == other.persons)

    def __repr__(self):
        # representing fields as FieldDict causes problems with representing
        # fields.parent, so represent it as a list of tuples
        repr_fields = repr(self.fields.items())

        return 'Entry({type_}, fields={fields}, persons={persons})'.format(
            type_=repr(self.type),
            fields=repr_fields,
            persons=repr(self.persons),
        )

    def get_crossref(self):
        return self.collection.entries[self.fields['crossref']]

    def add_person(self, person, role):
        self.persons.setdefault(role, []).append(person)

    def lower(self):
        return type(self)(
            self.type,
            fields=self.fields.lower(),
            persons=self.persons.lower(),
            collection=self.collection,
        )
コード例 #3
0
 def __init__(self, type_, fields=None, persons=None):
     if fields is None:
         fields = {}
     if persons is None:
         persons = {}
     self.type = type_.lower()
     self.original_type = type_
     self.fields = OrderedCaseInsensitiveDict(fields)
     self.persons = OrderedCaseInsensitiveDict(persons)
コード例 #4
0
ファイル: getinspire.py プロジェクト: misho104/getinspire
    def fetch_and_update(self, bibtex=True, latex_format='EU', append_bbl=False):
        existing_keys = self.bib().entries.keys() if bibtex else self.bbl_keys()
        existing_keys = [k.lower() for k in existing_keys]

        replace_keys = ODict()
        new_entries = ODict()
        type_name = 'BibTeX' if bibtex else 'LaTeX({})'.format(latex_format)

        for ref in self.references.values():
            if ref.key.lower() in existing_keys:
                if DEBUG:
                    print('skip existing: {}'.format(ref.key))
                continue
            if Key.is_unknown(ref.key):
                print('WARNING: skip non-existing but unknown-type key {}'.format(ref.key))
                continue

            try:
                print('fetching', type_name, 'from inspire:', ref.key, end=' ')
                if bibtex:
                    ref.fetch_bibtex()
                else:
                    ref.fetch_latex(latex_format)
                sys.stdout.flush()
                time.sleep(0.3)
            except RecordNotFound or MultipleRecordsFound as e:
                print('\nERROR: {}'.format(e))
                continue

            if ref.new_key:
                replace_keys[ref.old_key] = ref
                print('->', ref.new_key, end=' ')
                sys.stdout.flush()
                time.sleep(0.3)
            if ref.key.lower() not in existing_keys:
                existing_keys.append(ref.key.lower())
                new_entries[ref.key] = ref
                print('[new entry]', end='')
            print('')

        replacements = list()
        for ref in replace_keys.values():
            for appearance in ref.positions:
                replacements.append((appearance, ref.old_key, ref.new_key))
        self.replace_text(replacements)
        self.write_tex()

        new_ref_contents = '\n'.join(r.content for r in new_entries.values())
        if bibtex:
            self.append_and_update_bib(new_ref_contents)
        else:
            self.modify_and_write_bbl(new_ref_contents, append_bbl)
コード例 #5
0
    def __init__(self, type_, fields=None, persons=None, collection=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        self.original_type = type_
        self.fields = FieldDict(self, fields)
        self.persons = OrderedCaseInsensitiveDict(persons)
        self.collection = collection

        # for BibTeX interpreter
        self.vars = {}
コード例 #6
0
ファイル: __init__.py プロジェクト: dineiar/pybtex
class Entry(object):
    """Bibliography entry. Important members are:
    - persons (a dict of Person objects)
    - fields (all dict of string)
    """

    def __init__(self, type_, fields=None, persons=None, collection=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        self.original_type = type_
        self.fields = FieldDict(self, fields)
        self.persons = OrderedCaseInsensitiveDict(persons)
        self.collection = collection

        # for BibTeX interpreter
        self.vars = {}

    def __eq__(self, other):
        if not isinstance(other, Entry):
            return super(Entry, self) == other
        return (
                self.type == other.type
                and self.fields == other.fields
                and self.persons == other.persons
        )

    def __repr__(self):
        return 'Entry({type_}, fields={fields}, persons={persons})'.format(
            type_=repr(self.type),
            fields=repr(self.fields),
            persons=repr(self.persons),
        )

    def get_crossref(self):
        return self.collection.entries[self.fields['crossref']]

    def add_person(self, person, role):
        self.persons.setdefault(role, []).append(person)

    def lower(self):
        return type(self)(
            self.type,
            fields=self.fields.lower(),
            persons=self.persons.lower(),
            collection=self.collection,
        )
コード例 #7
0
    def __init__(self,
                 entries=None,
                 preamble=None,
                 wanted_entries=None,
                 min_crossrefs=2):
        """
        A :py:class:`.BibliographyData` object contains a dictionary of bibliography
        entries referenced by their keys.
        Each entry represented by an :py:class:`.Entry` object.

        Additionally, :py:class:`.BibliographyData` may contain a LaTeX
        preamble defined by ``@PREAMBLE`` commands in the BibTeX file.
        """

        self.entries = OrderedCaseInsensitiveDict()
        '''A dictionary of bibliography entries referenced by their keys.

        The dictionary is case insensitive:

        >>> bib_data = parse_string("""
        ...     @ARTICLE{gnats,
        ...         author = {L[eslie] A. Aamport},
        ...         title = {The Gnats and Gnus Document Preparation System},
        ...     }
        ... """, 'bibtex')
        >>> bib_data.entries['gnats'] == bib_data.entries['GNATS']
        True

        '''

        self.crossref_count = CaseInsensitiveDefaultDict(int)
        self.min_crossrefs = min_crossrefs
        self._preamble = []
        if wanted_entries is not None:
            self.wanted_entries = CaseInsensitiveSet(wanted_entries)
            self.citations = CaseInsensitiveSet(wanted_entries)
        else:
            self.wanted_entries = None
            self.citations = CaseInsensitiveSet()
        if entries:
            if isinstance(entries, Mapping):
                entries = entries.items()
            for (key, entry) in entries:
                self.add_entry(key, entry)
        if preamble:
            self._preamble.extend(preamble)
コード例 #8
0
ファイル: __init__.py プロジェクト: dineiar/pybtex
 def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2):
     self.entries = OrderedCaseInsensitiveDict()
     self.crossref_count = CaseInsensitiveDefaultDict(int)
     self.min_crossrefs = min_crossrefs
     self._preamble = []
     if wanted_entries is not None:
         self.wanted_entries = CaseInsensitiveSet(wanted_entries)
         self.citations = CaseInsensitiveSet(wanted_entries)
     else:
         self.wanted_entries = None
         self.citations = CaseInsensitiveSet()
     if entries:
         if isinstance(entries, Mapping):
             entries = entries.iteritems()
         for (key, entry) in entries:
             self.add_entry(key, entry)
     if preamble:
         self._preamble.extend(preamble)
コード例 #9
0
ファイル: only_used.py プロジェクト: phfaist/bibolamazi
    def filter_bibolamazifile(self, bibolamazifile):

        jobname = auxfile.get_action_jobname(self.jobname, bibolamazifile)

        citations = auxfile.get_all_auxfile_citations(jobname, bibolamazifile, self.name(), self.search_dirs,
                                                      return_set=True)

        logger.longdebug("set of citations: %r", citations)


        bibdata = bibolamazifile.bibliographyData()
        
        newentries = OrderedCaseInsensitiveDict()

        for key,entry in iteritems(bibdata.entries):
            if key in citations:
                newentries[key] = entry

        logger.longdebug("the new database has entries %r" %(newentries.keys()))

        bibolamazifile.setEntries(iteritems(newentries))

        return
コード例 #10
0
    def __init__(self, type_, fields=None, persons=None, collection=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        """Entry type (``'book'``, ``'article'``, etc.)."""
        self.original_type = type_

        self.fields = FieldDict(self, fields)
        """A dictionary of entry fields.
        The dictionary is ordered and case-insensitive."""

        self.persons = OrderedCaseInsensitiveDict(persons)
        """A dictionary of entry persons, by their roles.

        The most often used roles are ``'author'`` and ``'editor'``.
        """

        self.collection = collection

        # for BibTeX interpreter
        self.vars = {}
コード例 #11
0
ファイル: only_used.py プロジェクト: Superbeet/bibolamazi
    def filter_bibolamazifile(self, bibolamazifile):

        logger.debug("Getting list of used citations from %s.aux." %(self.jobname))

        citations = auxfile.get_all_auxfile_citations(self.jobname, bibolamazifile, self.name(), self.search_dirs,
                                                      return_set=True);

        logger.longdebug("set of citations: %r"%(citations))


        bibdata = bibolamazifile.bibliographyData()
        
        newentries = OrderedCaseInsensitiveDict()

        for key,entry in bibdata.entries.iteritems():
            if key in citations:
                newentries[key] = entry

        logger.longdebug("the new database has entries %r" %(newentries.keys()))

        bibolamazifile.setEntries(newentries.iteritems())

        return
コード例 #12
0
def convert_bibtex_keys(input_file: str, output_file: str):
    """
    Convert keys in a bibtex file to Google Scholar format.
    @input_file: string, input file name.
    @output_file: string, output file name.
    """
    bib_data = parse_file(input_file)
    keys, new_keys = obtain_replace_keys(bib_data)
    new_entries = OrderedCaseInsensitiveDict()
    for key, new_key in zip(keys, new_keys):
        new_entries[new_key] = bib_data.entries[key]
    bib_data.entries = new_entries
    bib_data = update_arxiv_information(bib_data)
    with open(output_file, 'w', encoding='utf-8') as ofile:
        bib_data.to_file(ofile)
コード例 #13
0
    def __init__(self, type_, fields=None, persons=None, collection=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        self.original_type = type_

        self.fields = FieldDict(self, fields)
        self.rich_fields = RichFieldProxyDict(self.fields)

        self.persons = OrderedCaseInsensitiveDict(persons)

        self.collection = collection

        # for BibTeX interpreter
        self.vars = {}
コード例 #14
0
    def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2):
        """
        A :py:class:`.BibliographyData` object contains a dictionary of bibliography
        entries referenced by their keys.
        Each entry represented by an :py:class:`.Entry` object.

        Additionally, :py:class:`.BibliographyData` may contain a LaTeX
        preamble defined by ``@PREAMBLE`` commands in the BibTeX file.
        """

        self.entries = OrderedCaseInsensitiveDict()
        '''A dictionary of bibliography entries referenced by their keys.

        The dictionary is case insensitive:

        >>> bib_data = parse_string("""
        ...     @ARTICLE{gnats,
        ...         author = {L[eslie] A. Aamport},
        ...         title = {The Gnats and Gnus Document Preparation System},
        ...     }
        ... """, 'bibtex')
        >>> bib_data.entries['gnats'] == bib_data.entries['GNATS']
        True

        '''

        self.crossref_count = CaseInsensitiveDefaultDict(int)
        self.min_crossrefs = min_crossrefs
        self._preamble = []
        if wanted_entries is not None:
            self.wanted_entries = CaseInsensitiveSet(wanted_entries)
            self.citations = CaseInsensitiveSet(wanted_entries)
        else:
            self.wanted_entries = None
            self.citations = CaseInsensitiveSet()
        if entries:
            if isinstance(entries, Mapping):
                entries = entries.iteritems()
            for (key, entry) in entries:
                self.add_entry(key, entry)
        if preamble:
            self._preamble.extend(preamble)
コード例 #15
0
ファイル: getinspire.py プロジェクト: misho104/getinspire
    def __init__(self, filename):
        self.text = None
        errors = []
        possible_paths = [filename, filename + '.tex']
        for path in possible_paths:
            try:
                with open(path, mode='r') as file:
                    self.text_original = file.read()
                    self.text = self.text_original
                    self.filename = path
                    self.stem = os.path.splitext(path)[0]
                    break
            except IOError as e:
                errors.append(e)
                pass
        if self.text is None:
            raise FileLookupFailedError(errors=errors, paths=possible_paths)

        self._bib_name = None
        self._bib = None
        self._bbl = None

        # generate references
        text_uncommented = self.strip_comment(self.text)
        self.references = ODict()  # Order is important!
        for cite in self.CITE_REGEX.finditer(text_uncommented):
            pos = Position(str=text_uncommented[:cite.start()])
            pos.shift(cite.group('pre'))
            for key_raw in re.split(r',', cite.group('body')):
                stripping = re.match(r'^(\s*)(\S+)(\s*)$', key_raw)
                pos.shift(stripping.group(1))
                key = stripping.group(2)
                if key not in self.references:
                    self.references[key] = Ref(key, position=pos.copy())
                else:
                    self.references[key].positions.append(pos.copy())
                pos.shift(stripping.group(2)).shift(stripping.group(3))
                pos.shift(',')
コード例 #16
0
class BibliographyData(object):
    def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2):
        """
        A :py:class:`.BibliographyData` object contains a dictionary of bibliography
        entries referenced by their keys.
        Each entry represented by an :py:class:`.Entry` object.

        Additionally, :py:class:`.BibliographyData` may contain a LaTeX
        preamble defined by ``@PREAMBLE`` commands in the BibTeX file.
        """

        self.entries = OrderedCaseInsensitiveDict()
        '''A dictionary of bibliography entries referenced by their keys.

        The dictionary is case insensitive:

        >>> bib_data = parse_string("""
        ...     @ARTICLE{gnats,
        ...         author = {L[eslie] A. Aamport},
        ...         title = {The Gnats and Gnus Document Preparation System},
        ...     }
        ... """, 'bibtex')
        >>> bib_data.entries['gnats'] == bib_data.entries['GNATS']
        True

        '''

        self.crossref_count = CaseInsensitiveDefaultDict(int)
        self.min_crossrefs = min_crossrefs
        self._preamble = []
        if wanted_entries is not None:
            self.wanted_entries = CaseInsensitiveSet(wanted_entries)
            self.citations = CaseInsensitiveSet(wanted_entries)
        else:
            self.wanted_entries = None
            self.citations = CaseInsensitiveSet()
        if entries:
            if isinstance(entries, Mapping):
                entries = entries.iteritems()
            for (key, entry) in entries:
                self.add_entry(key, entry)
        if preamble:
            self._preamble.extend(preamble)

    def __eq__(self, other):
        if not isinstance(other, BibliographyData):
            return super(BibliographyData, self) == other
        return (
            self.entries == other.entries
            and self._preamble == other._preamble
        )

    def __repr__(self):
        return 'BibliographyData(entries={entries}, preamble={preamble})'.format(
            entries=repr(self.entries),
            preamble=repr(self._preamble),
        )

    def add_to_preamble(self, *values):
        self._preamble.extend(values)

    @property
    def preamble(self):
        r'''
        LaTeX preamble.

        >>> bib_data = parse_string(r"""
        ...     @PREAMBLE{"\newcommand{\noopsort}[1]{}"}
        ... """, 'bibtex')
        >>> print bib_data.preamble
        \newcommand{\noopsort}[1]{}

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.get_preamble()`, which is now deprecated.
        '''
        return ''.join(self._preamble)

    @deprecated('0.19', 'use BibliographyData.preamble instead')
    def get_preamble(self):
        """
        .. deprecated:: 0.19
            Use :py:attr:`.preamble` instead.
        """
        return self.preamble

    def want_entry(self, key):
        return (
            self.wanted_entries is None
            or key in self.wanted_entries
            or '*' in self.wanted_entries
        )

    def get_canonical_key(self, key):
        if key in self.citations:
            return self.citations.get_canonical_key(key)
        else:
            return key

    def add_entry(self, key, entry):
        if not self.want_entry(key):
            return
        if key in self.entries:
            report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key))
            return
        entry.collection = self
        entry.key = self.get_canonical_key(key)
        self.entries[entry.key] = entry
        try:
            crossref = entry.fields['crossref']
        except KeyError:
            pass
        else:
            if self.wanted_entries is not None:
                self.wanted_entries.add(crossref)

    def add_entries(self, entries):
        for key, entry in entries:
            self.add_entry(key, entry)

    def _get_crossreferenced_citations(self, citations, min_crossrefs):
        """
        Get cititations not cited explicitly but referenced by other citations.

        >>> from pybtex.database import Entry
        >>> data = BibliographyData({
        ...     'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}),
        ...     'xrefd_arcicle': Entry('article'),
        ... })
        >>> list(data._get_crossreferenced_citations([], min_crossrefs=1))
        []
        >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2))
        []
        >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []

        >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys())
        >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1))
        []
        >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2))
        []
        >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []
        >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []

        """

        crossref_count = CaseInsensitiveDefaultDict(int)
        citation_set = CaseInsensitiveSet(citations)
        for citation in citations:
            try:
                entry = self.entries[citation]
                crossref = entry.fields['crossref']
            except KeyError:
                continue
            try:
                crossref_entry = self.entries[crossref]
            except KeyError:
                report_error(BibliographyDataError(
                    'bad cross-reference: entry "{key}" refers to '
                    'entry "{crossref}" which does not exist.'.format(
                        key=citation, crossref=crossref,
                    )
                ))
                continue

            canonical_crossref = crossref_entry.key
            crossref_count[canonical_crossref] += 1
            if crossref_count[canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set:
                citation_set.add(canonical_crossref)
                yield canonical_crossref

    def _expand_wildcard_citations(self, citations):
        """
        Expand wildcard citations (\citation{*} in .aux file).

        >>> from pybtex.database import Entry
        >>> data = BibliographyData((
        ...     ('uno', Entry('article')),
        ...     ('dos', Entry('article')),
        ...     ('tres', Entry('article')),
        ...     ('cuatro', Entry('article')),
        ... ))
        >>> list(data._expand_wildcard_citations([]))
        []
        >>> list(data._expand_wildcard_citations(['*']))
        ['uno', 'dos', 'tres', 'cuatro']
        >>> list(data._expand_wildcard_citations(['uno', '*']))
        ['uno', 'dos', 'tres', 'cuatro']
        >>> list(data._expand_wildcard_citations(['dos', '*']))
        ['dos', 'uno', 'tres', 'cuatro']
        >>> list(data._expand_wildcard_citations(['*', 'uno']))
        ['uno', 'dos', 'tres', 'cuatro']
        >>> list(data._expand_wildcard_citations(['*', 'DOS']))
        ['uno', 'dos', 'tres', 'cuatro']

        """

        citation_set = CaseInsensitiveSet()
        for citation in citations:
            if citation == '*':
                for key in self.entries:
                    if key not in citation_set:
                        citation_set.add(key)
                        yield key
            else:
                if citation not in citation_set:
                    citation_set.add(citation)
                    yield citation

    def add_extra_citations(self, citations, min_crossrefs):
        expanded_citations = list(self._expand_wildcard_citations(citations))
        crossrefs = list(self._get_crossreferenced_citations(expanded_citations, min_crossrefs))
        return expanded_citations + crossrefs

    def to_string(self, bib_format, **kwargs):
        """
        Return the data as a unicode string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).

        .. versionadded:: 0.19
        """
        writer = find_plugin('pybtex.database.output', bib_format)(**kwargs)
        return writer.to_string(self)

    def to_bytes(self, bib_format, **kwargs):
        """
        Return the data as a byte string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).

        .. versionadded:: 0.19
        """
        writer = find_plugin('pybtex.database.output', bib_format)(**kwargs)
        return writer.to_bytes(self)

    def to_file(self, file, bib_format=None, **kwargs):
        """
        Save the data to a file.

        :param file: A file name or a file-like object.
        :param bib_format: Data format ("bibtex", "yaml", etc.).
            If not specified, Pybtex will try to guess by the file name.

        .. versionadded:: 0.19
        """
        if isinstance(file, basestring):
            filename = file
        else:
            filename = getattr(file, 'name', None)
        writer = find_plugin('pybtex.database.output', bib_format, filename=filename)(**kwargs)
        return writer.write_file(self, file)

    def lower(self):
        u'''
        Return another :py:class:`.BibliographyData` with all identifiers converted to lowercase.

        >>> data = parse_string("""
        ...     @BOOK{Obrazy,
        ...         title = "Obrazy z Rus",
        ...         author = "Karel Havlíček Borovský",
        ...     }
        ...     @BOOK{Elegie,
        ...         title = "Tirolské elegie",
        ...         author = "Karel Havlíček Borovský",
        ...     }
        ... """, 'bibtex')
        >>> data_lower = data.lower()
        >>> data_lower.entries.keys()
        ['obrazy', 'elegie']
        >>> for entry in data_lower.entries.values():
        ...     entry.key
        ...     entry.persons.keys()
        ...     entry.fields.keys()
        'obrazy'
        ['author']
        ['title']
        'elegie'
        ['author']
        ['title']

        '''

        entries_lower = ((key.lower(), entry.lower()) for key, entry in self.entries.iteritems())
        return type(self)(
            entries=entries_lower,
            preamble=self._preamble,
            wanted_entries=self.wanted_entries,
            min_crossrefs=self.min_crossrefs,
        )
コード例 #17
0
class BibliographyData(object):
    def __init__(self,
                 entries=None,
                 preamble=None,
                 wanted_entries=None,
                 min_crossrefs=2):
        """
        A :py:class:`.BibliographyData` object contains a dictionary of bibliography
        entries referenced by their keys.
        Each entry represented by an :py:class:`.Entry` object.

        Additionally, :py:class:`.BibliographyData` may contain a LaTeX
        preamble defined by ``@PREAMBLE`` commands in the BibTeX file.
        """

        self.entries = OrderedCaseInsensitiveDict()
        '''A dictionary of bibliography entries referenced by their keys.

        The dictionary is case insensitive:

        >>> bib_data = parse_string("""
        ...     @ARTICLE{gnats,
        ...         author = {L[eslie] A. Aamport},
        ...         title = {The Gnats and Gnus Document Preparation System},
        ...     }
        ... """, 'bibtex')
        >>> bib_data.entries['gnats'] == bib_data.entries['GNATS']
        True

        '''

        self.crossref_count = CaseInsensitiveDefaultDict(int)
        self.min_crossrefs = min_crossrefs
        self._preamble = []
        if wanted_entries is not None:
            self.wanted_entries = CaseInsensitiveSet(wanted_entries)
            self.citations = CaseInsensitiveSet(wanted_entries)
        else:
            self.wanted_entries = None
            self.citations = CaseInsensitiveSet()
        if entries:
            if isinstance(entries, Mapping):
                entries = entries.items()
            for (key, entry) in entries:
                self.add_entry(key, entry)
        if preamble:
            self._preamble.extend(preamble)

    def __eq__(self, other):
        if not isinstance(other, BibliographyData):
            return super(BibliographyData, self) == other
        return (self.entries == other.entries
                and self._preamble == other._preamble)

    def __repr__(self):

        repr_entry = repr(self.entries)
        keys = self.entries.keys()

        for key in keys:
            ind = repr_entry.index(key) - 2  # find first instance
            repr_entry = repr_entry[:ind] + "\n" + repr_entry[ind:]

        repr_entry = indent(repr_entry, prefix="    ")
        repr_entry = repr_entry[4:]  # drop 1st indent

        return ("BibliographyData(\n"
                "  entries={0},\n\n"
                "  preamble={1})".format(repr_entry, repr(self._preamble)))

    def add_to_preamble(self, *values):
        self._preamble.extend(values)

    @property
    def preamble(self):
        r'''
        LaTeX preamble.

        >>> bib_data = parse_string(r"""
        ...     @PREAMBLE{"\newcommand{\noopsort}[1]{}"}
        ... """, 'bibtex')
        >>> print(bib_data.preamble)
        \newcommand{\noopsort}[1]{}

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.get_preamble()`, which is now deprecated.
        '''
        return ''.join(self._preamble)

    @deprecated('0.19', 'use BibliographyData.preamble instead')
    def get_preamble(self):
        """
        .. deprecated:: 0.19
            Use :py:attr:`.preamble` instead.
        """
        return self.preamble

    def want_entry(self, key):
        return (self.wanted_entries is None or key in self.wanted_entries
                or '*' in self.wanted_entries)

    def get_canonical_key(self, key):
        if key in self.citations:
            return self.citations.get_canonical_key(key)
        else:
            return key

    def add_entry(self, key, entry):
        if not self.want_entry(key):
            return
        if key in self.entries:
            report_error(
                BibliographyDataError('repeated bibliograhpy entry: %s' % key))
            return
        entry.key = self.get_canonical_key(key)
        self.entries[entry.key] = entry
        try:
            crossref = entry.fields['crossref']
        except KeyError:
            pass
        else:
            if self.wanted_entries is not None:
                self.wanted_entries.add(crossref)

    def add_entries(self, entries):
        for key, entry in entries:
            self.add_entry(key, entry)

    @fix_unicode_literals_in_doctest
    def _get_crossreferenced_citations(self, citations, min_crossrefs):
        r"""
        Get cititations not cited explicitly but referenced by other citations.

        >>> from pybtex.database import Entry
        >>> data = BibliographyData({
        ...     'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}),
        ...     'xrefd_arcicle': Entry('article'),
        ... })
        >>> list(data._get_crossreferenced_citations([], min_crossrefs=1))
        []
        >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1))
        [u'xrefd_arcicle']
        >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1))
        [u'xrefd_arcicle']
        >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2))
        []
        >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []

        >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys())
        >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1))
        []
        >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1))
        [u'xrefd_arcicle']
        >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1))
        [u'xrefd_arcicle']
        >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2))
        []
        >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []
        >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []

        """

        crossref_count = CaseInsensitiveDefaultDict(int)
        citation_set = CaseInsensitiveSet(citations)
        for citation in citations:
            try:
                entry = self.entries[citation]
                crossref = entry.fields['crossref']
            except KeyError:
                continue
            try:
                crossref_entry = self.entries[crossref]
            except KeyError:
                report_error(
                    BibliographyDataError(
                        'bad cross-reference: entry "{key}" refers to '
                        'entry "{crossref}" which does not exist.'.format(
                            key=citation,
                            crossref=crossref,
                        )))
                continue

            canonical_crossref = crossref_entry.key
            crossref_count[canonical_crossref] += 1
            if crossref_count[
                    canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set:
                citation_set.add(canonical_crossref)
                yield canonical_crossref

    @fix_unicode_literals_in_doctest
    def _expand_wildcard_citations(self, citations):
        r"""
        Expand wildcard citations (\citation{*} in .aux file).

        >>> from pybtex.database import Entry
        >>> data = BibliographyData((
        ...     ('uno', Entry('article')),
        ...     ('dos', Entry('article')),
        ...     ('tres', Entry('article')),
        ...     ('cuatro', Entry('article')),
        ... ))
        >>> list(data._expand_wildcard_citations([]))
        []
        >>> list(data._expand_wildcard_citations(['*']))
        [u'uno', u'dos', u'tres', u'cuatro']
        >>> list(data._expand_wildcard_citations(['uno', '*']))
        [u'uno', u'dos', u'tres', u'cuatro']
        >>> list(data._expand_wildcard_citations(['dos', '*']))
        [u'dos', u'uno', u'tres', u'cuatro']
        >>> list(data._expand_wildcard_citations(['*', 'uno']))
        [u'uno', u'dos', u'tres', u'cuatro']
        >>> list(data._expand_wildcard_citations(['*', 'DOS']))
        [u'uno', u'dos', u'tres', u'cuatro']

        """

        citation_set = CaseInsensitiveSet()
        for citation in citations:
            if citation == '*':
                for key in self.entries:
                    if key not in citation_set:
                        citation_set.add(key)
                        yield key
            else:
                if citation not in citation_set:
                    citation_set.add(citation)
                    yield citation

    def add_extra_citations(self, citations, min_crossrefs):
        expanded_citations = list(self._expand_wildcard_citations(citations))
        crossrefs = list(
            self._get_crossreferenced_citations(expanded_citations,
                                                min_crossrefs))
        return expanded_citations + crossrefs

    def to_string(self, bib_format, **kwargs):
        """
        Return the data as a unicode string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).

        .. versionadded:: 0.19
        """
        writer = find_plugin('pybtex.database.output', bib_format)(**kwargs)
        return writer.to_string(self)

    @classmethod
    def from_string(cls, value, bib_format, **kwargs):
        """
        Return the data from a unicode string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).

        .. versionadded:: 0.22.2
        """
        return parse_string(value, bib_format, **kwargs)

    def to_bytes(self, bib_format, **kwargs):
        """
        Return the data as a byte string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).

        .. versionadded:: 0.19
        """
        writer = find_plugin('pybtex.database.output', bib_format)(**kwargs)
        return writer.to_bytes(self)

    def to_file(self, file, bib_format=None, **kwargs):
        """
        Save the data to a file.

        :param file: A file name or a file-like object.
        :param bib_format: Data format ("bibtex", "yaml", etc.).
            If not specified, Pybtex will try to guess by the file name.

        .. versionadded:: 0.19
        """
        if isinstance(file, six.string_types):
            filename = file
        else:
            filename = getattr(file, 'name', None)
        writer = find_plugin('pybtex.database.output',
                             bib_format,
                             filename=filename)(**kwargs)
        return writer.write_file(self, file)

    @fix_unicode_literals_in_doctest
    def lower(self):
        u'''
        Return another :py:class:`.BibliographyData` with all identifiers converted to lowercase.

        >>> data = parse_string("""
        ...     @BOOK{Obrazy,
        ...         title = "Obrazy z Rus",
        ...         author = "Karel Havlíček Borovský",
        ...     }
        ...     @BOOK{Elegie,
        ...         title = "Tirolské elegie",
        ...         author = "Karel Havlíček Borovský",
        ...     }
        ... """, 'bibtex')
        >>> data_lower = data.lower()
        >>> list(data_lower.entries.keys())
        [u'obrazy', u'elegie']
        >>> for entry in data_lower.entries.values():
        ...     entry.key
        ...     list(entry.persons.keys())
        ...     list(entry.fields.keys())
        u'obrazy'
        [u'author']
        [u'title']
        u'elegie'
        [u'author']
        [u'title']

        '''

        entries_lower = ((key.lower(), entry.lower())
                         for key, entry in self.entries.items())
        return type(self)(
            entries=entries_lower,
            preamble=self._preamble,
            wanted_entries=self.wanted_entries,
            min_crossrefs=self.min_crossrefs,
        )
コード例 #18
0
class BibliographyData(object):
    def __init__(self,
                 entries=None,
                 preamble=None,
                 wanted_entries=None,
                 min_crossrefs=2):
        self.entries = OrderedCaseInsensitiveDict()
        self.crossref_count = CaseInsensitiveDefaultDict(int)
        self.min_crossrefs = min_crossrefs
        self._preamble = []
        if wanted_entries is not None:
            self.wanted_entries = CaseInsensitiveSet(wanted_entries)
            self.citations = CaseInsensitiveSet(wanted_entries)
        else:
            self.wanted_entries = None
            self.citations = CaseInsensitiveSet()
        if entries:
            if isinstance(entries, Mapping):
                entries = iter(entries.items())
            for (key, entry) in entries:
                self.add_entry(key, entry)
        if preamble:
            self._preamble.extend(preamble)

    def __eq__(self, other):
        if not isinstance(other, BibliographyData):
            return super(BibliographyData, self) == other
        return (self.entries == other.entries
                and self._preamble == other._preamble)

    def __repr__(self):
        return 'BibliographyData(entries={entries}, preamble={preamble})'.format(
            entries=repr(self.entries),
            preamble=repr(self._preamble),
        )

    def add_to_preamble(self, *values):
        self._preamble.extend(values)

    @deprecated('0.17', 'use get_preamble instead')
    def preamble(self):
        return self.get_preamble()

    def get_preamble(self):
        return ''.join(self._preamble)

    def want_entry(self, key):
        return (self.wanted_entries is None or key in self.wanted_entries
                or '*' in self.wanted_entries)

    def get_canonical_key(self, key):
        if key in self.citations:
            return self.citations.get_canonical_key(key)
        else:
            return key

    def add_entry(self, key, entry):
        if not self.want_entry(key):
            return
        if key in self.entries:
            report_error(
                BibliographyDataError('repeated bibliograhpy entry: %s' % key))
            return
        entry.collection = self
        entry.key = self.get_canonical_key(key)
        self.entries[entry.key] = entry
        try:
            crossref = entry.fields['crossref']
        except KeyError:
            pass
        else:
            if self.wanted_entries is not None:
                self.wanted_entries.add(crossref)

    def add_entries(self, entries):
        for key, entry in entries:
            self.add_entry(key, entry)

    def get_crossreferenced_citations(self, citations, min_crossrefs):
        """
        Get cititations not cited explicitly but referenced by other citations.

        >>> from pybtex.database import Entry
        >>> data = BibliographyData({
        ...     'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}),
        ...     'xrefd_arcicle': Entry('article'),
        ... })
        >>> list(data.get_crossreferenced_citations([], min_crossrefs=1))
        []
        >>> list(data.get_crossreferenced_citations(['main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data.get_crossreferenced_citations(['Main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data.get_crossreferenced_citations(['main_article'], min_crossrefs=2))
        []
        >>> list(data.get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []

        >>> data2 = BibliographyData(data.entries, wanted_entries=list(data.entries.keys()))
        >>> list(data2.get_crossreferenced_citations([], min_crossrefs=1))
        []
        >>> list(data2.get_crossreferenced_citations(['main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data2.get_crossreferenced_citations(['Main_article'], min_crossrefs=1))
        ['xrefd_arcicle']
        >>> list(data2.get_crossreferenced_citations(['main_article'], min_crossrefs=2))
        []
        >>> list(data2.get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []
        >>> list(data2.get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1))
        []

        """

        crossref_count = CaseInsensitiveDefaultDict(int)
        citation_set = CaseInsensitiveSet(citations)
        for citation in citations:
            try:
                entry = self.entries[citation]
                crossref = entry.fields['crossref']
            except KeyError:
                continue
            try:
                crossref_entry = self.entries[crossref]
            except KeyError:
                report_error(
                    BibliographyDataError(
                        'bad cross-reference: entry "{key}" refers to '
                        'entry "{crossref}" which does not exist.'.format(
                            key=citation,
                            crossref=crossref,
                        )))
                continue

            canonical_crossref = crossref_entry.key
            crossref_count[canonical_crossref] += 1
            if crossref_count[
                    canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set:
                citation_set.add(canonical_crossref)
                yield canonical_crossref

    def expand_wildcard_citations(self, citations):
        """
        Expand wildcard citations (\citation{*} in .aux file).

        >>> from pybtex.database import Entry
        >>> data = BibliographyData((
        ...     ('uno', Entry('article')),
        ...     ('dos', Entry('article')),
        ...     ('tres', Entry('article')),
        ...     ('cuatro', Entry('article')),
        ... ))
        >>> list(data.expand_wildcard_citations([]))
        []
        >>> list(data.expand_wildcard_citations(['*']))
        ['uno', 'dos', 'tres', 'cuatro']
        >>> list(data.expand_wildcard_citations(['uno', '*']))
        ['uno', 'dos', 'tres', 'cuatro']
        >>> list(data.expand_wildcard_citations(['dos', '*']))
        ['dos', 'uno', 'tres', 'cuatro']
        >>> list(data.expand_wildcard_citations(['*', 'uno']))
        ['uno', 'dos', 'tres', 'cuatro']
        >>> list(data.expand_wildcard_citations(['*', 'DOS']))
        ['uno', 'dos', 'tres', 'cuatro']

        """

        citation_set = CaseInsensitiveSet()
        for citation in citations:
            if citation == '*':
                for key in self.entries:
                    if key not in citation_set:
                        citation_set.add(key)
                        yield key
            else:
                if citation not in citation_set:
                    citation_set.add(citation)
                    yield citation

    def add_extra_citations(self, citations, min_crossrefs):
        expanded_citations = list(self.expand_wildcard_citations(citations))
        crossrefs = list(
            self.get_crossreferenced_citations(expanded_citations,
                                               min_crossrefs))
        return expanded_citations + crossrefs

    def lower(self):
        """
        Return another BibliographyData with all identifiers converted to lowercase.

        >>> data = BibliographyData([
        ...     ('Obrazy', Entry('Book', [('Title', 'Obrazy z Rus')], [('Author', 'Karel Havlíček Borovský')])),
        ...     ('Elegie', Entry('BOOK', [('TITLE', 'Tirolské elegie')], [('AUTHOR', 'Karel Havlíček Borovský')])),
        ... ]).lower()
        >>> list(data.entries.keys())
        ['obrazy', 'elegie']
        >>> for entry in list(data.entries.values()):
        ...     entry.key
        ...     list(entry.persons.keys())
        ...     list(entry.fields.keys())
        'obrazy'
        ['author']
        ['title']
        'elegie'
        ['author']
        ['title']

        """

        entries_lower = ((key.lower(), entry.lower())
                         for key, entry in self.entries.items())
        return type(self)(
            entries=entries_lower,
            preamble=self._preamble,
            wanted_entries=self.wanted_entries,
            min_crossrefs=self.min_crossrefs,
        )
コード例 #19
0
class Entry(object):
    """A bibliography entry."""

    type = None
    """Entry type (``'book'``, ``'article'``, etc.)."""

    key = None
    """Entry key (for example, ``'fukushima1980neocognitron'``)."""

    fields = None
    """A dictionary of entry fields.
    The dictionary is ordered and case-insensitive."""

    persons = None
    """
    A dictionary of entry persons, by their roles.

    The most often used roles are ``'author'`` and ``'editor'``.
    """
    """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs."""
    def __init__(self, type_, fields=None, persons=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        self.original_type = type_
        self.fields = OrderedCaseInsensitiveDict(fields)
        self.persons = OrderedCaseInsensitiveDict(persons)

    def __eq__(self, other):
        if not isinstance(other, Entry):
            return super(Entry, self) == other
        return (self.type == other.type and self.fields == other.fields
                and self.persons == other.persons)

    def __repr__(self):
        # represent the fields as a list of tuples for simplicity
        repr_fields = repr(list(self.fields.items()))
        keys = self.fields.keys()

        for key in keys:
            ind = repr_fields.index(key) - 2  # find first instance
            repr_fields = repr_fields[:ind] + "\n" + repr_fields[ind:]

        repr_fields = indent(repr_fields, prefix="    ")
        repr_fields = repr_fields[4:]  # drop 1st indent

        return ("Entry({0},\n"
                "  fields={1},\n"
                "  persons={2})".format(repr(self.type), repr_fields,
                                        repr(self.persons)))

    def add_person(self, person, role):
        self.persons.setdefault(role, []).append(person)

    def lower(self):
        return type(self)(
            self.type,
            fields=self.fields.lower(),
            persons=self.persons.lower(),
        )

    def _find_person_field(self, role):
        persons = self.persons[role]
        return ' and '.join(six.text_type(person) for person in persons)

    def _find_crossref_field(self, name, bib_data):
        if bib_data is None or 'crossref' not in self.fields:
            raise KeyError(name)
        referenced_entry = bib_data.entries[self.fields['crossref']]
        return referenced_entry._find_field(name, bib_data)

    def _find_field(self, name, bib_data=None):
        """
        Find the field with the given ``name`` according to this rules:

        - If the given field ``name`` in in ``self.fields``, just return
          self.fields[name].

        - Otherwise, if ``name`` is ``"authors"`` or ``"editors"`` (or any other
          person role), return the list of names as a string, separated by
          ``" and "``.

        - Otherwise, if this entry has a ``crossreff`` field, look up for the
          cross-referenced entry and try to find its field with the given
          ``name``.
        """
        try:
            return self.fields[name]
        except KeyError:
            try:
                return self._find_person_field(name)
            except KeyError:
                return self._find_crossref_field(name, bib_data)

    def to_string(self, bib_format, **kwargs):
        """
        Return the data as a unicode string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).

        """
        writer = find_plugin('pybtex.database.output', bib_format)(**kwargs)
        return writer.to_string(BibliographyData(entries={self.key: self}))

    @classmethod
    def from_string(cls, value, bib_format, entry_number=0, **kwargs):
        """
        Return the data from a unicode string in the given format.

        :param bib_format: Data format ("bibtex", "yaml", etc.).
        :param entry_number: entry number if the string has more than one.

        .. versionadded:: 0.22.2
        """
        # get bibliography
        bibdata = BibliographyData.from_string(value, bib_format, **kwargs)
        # grab specific instance
        key = tuple(bibdata.entries.keys())[entry_number]
        return bibdata.entries[key]
コード例 #20
0
ファイル: getinspire.py プロジェクト: misho104/getinspire
class TeX(object):
    CITE_REGEX = re.compile(r'(?P<pre>(\\cite(\[.*?\])?{))(?P<body>.*?)}', re.DOTALL)
    CITE_BIB_IN_TEX = re.compile(r'\\bibliography{(.*?)}', re.DOTALL)
    CITE_BIB_IN_BBL = re.compile(r'\\bibitem{(.*?)}', re.DOTALL)
    COMMENTS_REGEX = re.compile(r'((?:^|[^\\])(?:\\\\)*)%.*$', re.MULTILINE)

    @classmethod
    def strip_comment(cls, string):
        return cls.COMMENTS_REGEX.sub(r'\1', string)

    def __init__(self, filename):
        self.text = None
        errors = []
        possible_paths = [filename, filename + '.tex']
        for path in possible_paths:
            try:
                with open(path, mode='r') as file:
                    self.text_original = file.read()
                    self.text = self.text_original
                    self.filename = path
                    self.stem = os.path.splitext(path)[0]
                    break
            except IOError as e:
                errors.append(e)
                pass
        if self.text is None:
            raise FileLookupFailedError(errors=errors, paths=possible_paths)

        self._bib_name = None
        self._bib = None
        self._bbl = None

        # generate references
        text_uncommented = self.strip_comment(self.text)
        self.references = ODict()  # Order is important!
        for cite in self.CITE_REGEX.finditer(text_uncommented):
            pos = Position(str=text_uncommented[:cite.start()])
            pos.shift(cite.group('pre'))
            for key_raw in re.split(r',', cite.group('body')):
                stripping = re.match(r'^(\s*)(\S+)(\s*)$', key_raw)
                pos.shift(stripping.group(1))
                key = stripping.group(2)
                if key not in self.references:
                    self.references[key] = Ref(key, position=pos.copy())
                else:
                    self.references[key].positions.append(pos.copy())
                pos.shift(stripping.group(2)).shift(stripping.group(3))
                pos.shift(',')

    def write_tex(self):
        with open(self.filename, mode='w') as file:
            file.write(self.text)

    def bbl_name(self):
        return self.stem + '.bbl'

    def bbl(self):
        if self._bbl is None:
            if self.bbl_name() and os.path.exists(self.bbl_name()):
                try:
                    with open(self.bbl_name(), mode='r') as file:
                        self._bbl = file.read()
                except:
                    pass
            self._bbl = self._bbl or ""
        return self._bbl

    def modify_and_write_bbl(self, new_content, append=True):
        begin = '\\begin{thebibliography}{99}'
        end = '\\end{thebibliography}'
        if append:
            sep = re.split(r'\\end\s*{\s*thebibliography\s*}', self.bbl(), maxsplit=1)
            (bbl, footer) = sep if len(sep) == 2 else (sep[0], "")
            self._bbl = '\n'.join([bbl, new_content, end + footer])
        else:
            self._bbl = '\n\n'.join([begin, new_content, end])

        with open(self.bbl_name(), mode='w') as file:
            file.write(self.bbl())

    def bib_name(self):
        if self._bib_name is None:
            bib_keys = self.CITE_BIB_IN_TEX.findall(self.strip_comment(self.text))
            stem = None
            for bib_key in bib_keys:
                for bib in re.split(r'\s*,\s*', bib_key):
                    if stem is None:
                        stem = bib
                    elif stem != bib:
                        raise MultipleBibError
            if stem is None:
                self._bib_name = False  # for "not found"
            else:
                self._bib_name = os.path.join(os.path.dirname(self.filename), stem + '.bib')
        return self._bib_name

    def bib(self):
        if self._bib is None:
            if self.bib_name() and os.path.exists(self.bib_name()):
                try:
                    self._bib = pybtex.database.parse_file(self.bib_name(), bib_format='bibtex')
                except:
                    pass
            self._bib = self._bib or pybtex.database.BibliographyData()
        return self._bib

    # pybtex output are a bit buggy and avoided.
    # def append_bib(self, entries):
    #     # self._bib.add_entries(entries)  # maybe buggy?
    #     for k in entries.order:
    #         self._bib.add_entry(k, entries[k])
    # def update_bib(self):
    #     if self.bib_name():
    #         self._bib.to_file(self.bib_name(), bib_format='bibtex')
    #     else:
    #         raise RuntimeError()

    def append_and_update_bib(self, new_text):
        with open(self.bib_name(), mode='a') as file:
            file.write('\n' + new_text + '\n')
        self._bib = None  # clear and to be reload

    def bbl_keys(self):
        try:
            with open(self.bbl_name(), mode='r') as file:
                return self.CITE_BIB_IN_BBL.findall(self.strip_comment(file.read()))
        except IOError:
            return []

    def fetch_and_update(self, bibtex=True, latex_format='EU', append_bbl=False):
        existing_keys = self.bib().entries.keys() if bibtex else self.bbl_keys()
        existing_keys = [k.lower() for k in existing_keys]

        replace_keys = ODict()
        new_entries = ODict()
        type_name = 'BibTeX' if bibtex else 'LaTeX({})'.format(latex_format)

        for ref in self.references.values():
            if ref.key.lower() in existing_keys:
                if DEBUG:
                    print('skip existing: {}'.format(ref.key))
                continue
            if Key.is_unknown(ref.key):
                print('WARNING: skip non-existing but unknown-type key {}'.format(ref.key))
                continue

            try:
                print('fetching', type_name, 'from inspire:', ref.key, end=' ')
                if bibtex:
                    ref.fetch_bibtex()
                else:
                    ref.fetch_latex(latex_format)
                sys.stdout.flush()
                time.sleep(0.3)
            except RecordNotFound or MultipleRecordsFound as e:
                print('\nERROR: {}'.format(e))
                continue

            if ref.new_key:
                replace_keys[ref.old_key] = ref
                print('->', ref.new_key, end=' ')
                sys.stdout.flush()
                time.sleep(0.3)
            if ref.key.lower() not in existing_keys:
                existing_keys.append(ref.key.lower())
                new_entries[ref.key] = ref
                print('[new entry]', end='')
            print('')

        replacements = list()
        for ref in replace_keys.values():
            for appearance in ref.positions:
                replacements.append((appearance, ref.old_key, ref.new_key))
        self.replace_text(replacements)
        self.write_tex()

        new_ref_contents = '\n'.join(r.content for r in new_entries.values())
        if bibtex:
            self.append_and_update_bib(new_ref_contents)
        else:
            self.modify_and_write_bbl(new_ref_contents, append_bbl)

    def replace_text(self, replacement_rules):
        # replace from the end to the beginning not to break the positions
        replacement_rules = sorted(replacement_rules, key=lambda x: x[0], reverse=True)
        lines = Position.LINESEP_REGEX.split(self.text)
        for pos, old, new in replacement_rules:
            if lines[pos.l][pos.c:pos.c + len(old)] == old:
                lines[pos.l] = lines[pos.l][:pos.c] + new + lines[pos.l][pos.c + len(old):]
            else:
                raise ReplacementError(pos.l, pos.c, old, new, lines[pos.l][pos.c:pos.c + len(old)])
        self.text = '\n'.join(lines)
コード例 #21
0
    def cmd_makecldf(self, args):
        languages = {
            o["slug"]: o
            for o in self.raw_dir.read_json(self.raw_dir / "languages.json")
        }
        words = {
            o["slug"]: o
            for o in self.raw_dir.read_json(self.raw_dir / "words.json")
        }
        sources = {
            o["slug"]: o
            for o in self.raw_dir.read_json(self.raw_dir / "sources.json")
        }
        # handle sources
        # want to make sure that the bibtex key matches our source id.
        for source in sorted(sources):
            # this is ugly, I wish pybtex made this easier!
            bib = parse_string(sources[source]["bibtex"], "bibtex")
            old_key = list(bib.entries.keys())[0]
            bib.entries[old_key].key = source
            bib.entries = OrderedCaseInsensitiveDict([(source,
                                                       bib.entries[old_key])])
            args.writer.add_sources(bib)

        # handle languages
        for lang in sorted(languages):
            args.writer.add_language(
                ID=lang,
                Name=languages[lang]["fullname"],
                ISO639P3code=languages[lang]["isocode"],
                Glottocode=languages[lang]["glottocode"],
            )

        # handle concepts
        concepts = {}
        for concept in self.conceptlists[0].concepts.values():
            idx = '{0}_{1}'.format(concept.number, slug(concept.english))
            args.writer.add_concept(
                ID=idx,
                Name=concept.english,
                Concepticon_ID=concept.concepticon_id,
                Concepticon_Gloss=concept.concepticon_gloss)
            concepts[concept.english] = idx
            concepts[concept.english.replace(" ", "-")] = idx
            concepts[concept.english.replace(" ", "-").lower()] = idx
            concepts[slug(concept.english)] = idx
            concepts["-".join([slug(x)
                               for x in concept.english.split()])] = idx

            if '(' in concept.english:
                new_string = concept.english[:concept.english.index('(') - 1]
                concepts["-".join([slug(x) for x in new_string.split()])] = idx
                concepts[concept.english[:concept.english.index('(') -
                                         1]] = idx
                concepts[concept.english[:concept.english.index('(') -
                                         1].replace(' ', '-').lower()] = idx
            if concept.english.startswith("to "):
                new_string = concept.english[3:]
                concepts['-'.join([slug(x) for x in new_string.split()])] = idx
                concepts[concept.english.replace("to ", "")] = idx
        concepts["mans-mother-law"] = concepts["man's mother in law"]
        concepts["brother-law"] = concepts["brother in law"]
        concepts["to-make-hole"] = concepts["make hole (in ground)"]
        concepts["front"] = concepts["in front"]
        concepts["husk-nut"] = concepts["husk (of nut)"]
        concepts["his"] = concepts["his, hers, its (pronoun p:3s)"]
        concepts["we-two-incl"] = concepts[
            "we incl. dual (pronoun d:1p, incl, dual)"]
        concepts["intrnasitivizer"] = concepts["intransitivizer"]
        concepts["short-piece-wood"] = concepts["short-piece-of-wood"]
        concepts["top-foot"] = concepts["top (of foot)"]
        concepts["sit-feet-and-legs-together"] = concepts[
            "sit (with feet and legs together)"]
        concepts["earth"] = concepts["earth/soil"]
        concepts["warm"] = concepts["warm/hot"]
        concepts["your-sg"] = concepts["your (pronoun: p:2s)"]
        concepts["-law"] = concepts["in-law"]
        concepts["to-roast"] = concepts["roast"]
        concepts["arrow-barred"] = concepts[
            "arrow (barred) (Arrow with cross bar)"]
        concepts["them-dual"] = concepts["them (pronoun o:3p, dual)"]
        concepts["you-dual"] = concepts["you (pronoun d:2s)"]
        concepts["right-correct"] = concepts["right (correct, true)"]
        concepts["betelpepper"] = concepts["betelpepper vine"]
        concepts["to-chop"] = concepts["to chop, cut down"]
        concepts["road"] = concepts["road/path"]
        concepts["for-benefactive-clitic"] = concepts[
            "for (benefactive) ((cliticised or suffixed to noun))"]
        concepts["mans-father-law"] = concepts["mans' father in law"]
        concepts["sister-law"] = concepts["sister in law"]
        concepts["you-o2s"] = concepts["you (pronoun o:2s)"]
        concepts["you-pl-o2p"] = concepts["you pl. (pronoun o:2p)"]
        concepts["we-pl-incl"] = concepts["we incl. (pronoun d:1p, incl)"]
        concepts["in"] = concepts["in, inside"]
        concepts["not_know"] = concepts["not know"]
        concepts["their-dual"] = concepts["their (pronoun p:3p, dual)"]
        concepts["blow-fire"] = concepts["blow (on fire)"]
        concepts["blunt-eg-knife"] = concepts["blunt (of e.g. knife)"]
        concepts["our-dual"] = concepts["our (two) (pronoun p:1p, dual)"]
        concepts["your-pl-dual"] = concepts[
            "your (two) pl (pronoun p:2p, dual)"]
        concepts["suck-breast"] = concepts["to suck at breast"]
        concepts["draw-water-carry"] = concepts["draw water / carry"]
        concepts["tree-sp-Gnetum-gnemon"] = concepts[
            "tree sp. (Gnetum gnemon)"]
        concepts["he-she"] = concepts["he, she, it, that, those"]
        concepts["fed"] = concepts["fed up (with)"]
        concepts["you-pl-dual-o2p"] = concepts[
            "you plural two (pronoun d:2p, dual)"]
        concepts["you-pl-dual"] = concepts["you two (pronoun d:2s, dual)"]
        concepts["to-put"] = concepts["to put, give"]
        concepts["he-she-it-those"] = concepts["he, she, it, that, those"]
        concepts["we-two-excl"] = concepts[
            "we excl. dual (pronoun d:1p, excl, dual)"]
        concepts["we-pl-excl"] = concepts[
            "we excl. plural (pronoun d:1p, excl, plural)"]
        #concepts["affix-body-part"] = concepts[""]

        itemfiles = [
            f for f in self.raw_dir.iterdir() if f.name.startswith("language-")
        ]
        errors = set()
        for filename in progressbar(sorted(itemfiles), desc="adding lexemes"):
            for o in sorted(self.raw_dir.read_json(filename),
                            key=lambda d: d["id"]):
                wordid = self.get_slug_from_uri(o['word'])
                if wordid in concepts:
                    args.writer.add_forms_from_value(
                        Local_ID=o["id"],
                        Language_ID=self.get_slug_from_uri(o["language"]),
                        Parameter_ID=concepts[wordid],
                        Value=o["entry"],
                        Source=self.get_slug_from_uri(o["source"]),
                        Comment=o["annotation"],
                    )
                else:
                    errors.add(("concept", wordid))
        for error in errors:
            args.log.info("error with {0[0]}: {0[1]}".format(error))

        args.log.info("found {0} errors in concepts".format(len(errors)))
コード例 #22
0
class Entry(object):
    """A bibliography entry."""

    type = None
    """Entry type (``'book'``, ``'article'``, etc.)."""

    key = None
    """Entry key (for example, ``'fukushima1980neocognitron'``)."""

    fields = None
    """A dictionary of entry fields.
    The dictionary is ordered and case-insensitive."""

    rich_fields = None
    """A dictionary of entry fields, converted to :ref:`rich text <rich-text>`."""

    persons = None
    """
    A dictionary of entry persons, by their roles.

    The most often used roles are ``'author'`` and ``'editor'``.
    """

    collection = None
    """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs."""

    def __init__(self, type_, fields=None, persons=None, collection=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        self.original_type = type_

        self.fields = FieldDict(self, fields)
        self.rich_fields = RichFieldProxyDict(self.fields)

        self.persons = OrderedCaseInsensitiveDict(persons)

        self.collection = collection

        # for BibTeX interpreter
        self.vars = {}

    def __eq__(self, other):
        if not isinstance(other, Entry):
            return super(Entry, self) == other
        return (
            self.type == other.type
            and self.fields == other.fields
            and self.persons == other.persons
        )

    def __repr__(self):
        # representing fields as FieldDict causes problems with representing
        # fields.parent, so represent it as a list of tuples
        repr_fields = repr(self.fields.items())

        return 'Entry({type_}, fields={fields}, persons={persons})'.format(
            type_=repr(self.type),
            fields=repr_fields,
            persons=repr(self.persons),
        )

    def get_crossref(self):
        return self.collection.entries[self.fields['crossref']]

    def add_person(self, person, role):
        self.persons.setdefault(role, []).append(person)

    def lower(self):
        return type(self)(
            self.type,
            fields=self.fields.lower(),
            persons=self.persons.lower(),
            collection=self.collection,
        )
コード例 #23
0
ファイル: __init__.py プロジェクト: shekhar-hippargi/test
class Entry(object):
    """A bibliography entry."""

    type = None
    """Entry type (``'book'``, ``'article'``, etc.)."""

    key = None
    """Entry key (for example, ``'fukushima1980neocognitron'``)."""

    fields = None
    """A dictionary of entry fields.
    The dictionary is ordered and case-insensitive."""

    persons = None
    """
    A dictionary of entry persons, by their roles.

    The most often used roles are ``'author'`` and ``'editor'``.
    """

    """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs."""

    def __init__(self, type_, fields=None, persons=None):
        if fields is None:
            fields = {}
        if persons is None:
            persons = {}
        self.type = type_.lower()
        self.original_type = type_
        self.fields = OrderedCaseInsensitiveDict(fields)
        self.persons = OrderedCaseInsensitiveDict(persons)

    def __eq__(self, other):
        if not isinstance(other, Entry):
            return super(Entry, self) == other
        return (
            self.type == other.type
            and self.fields == other.fields
            and self.persons == other.persons
        )

    def __repr__(self):
        # represent the fields as a list of tuples for simplicity
        repr_fields = repr(self.fields.items())

        return 'Entry({type_}, fields={fields}, persons={persons})'.format(
            type_=repr(self.type),
            fields=repr_fields,
            persons=repr(self.persons),
        )

    def add_person(self, person, role):
        self.persons.setdefault(role, []).append(person)

    def lower(self):
        return type(self)(
            self.type,
            fields=self.fields.lower(),
            persons=self.persons.lower(),
        )

    def _find_person_field(self, role):
        persons = self.persons[role]
        return ' and '.join(six.text_type(person) for person in persons)

    def _find_crossref_field(self, name, bib_data):
        if bib_data is None or 'crossref' not in self.fields:
            raise KeyError(name)
        referenced_entry = bib_data.entries[self.fields['crossref']]
        return referenced_entry._find_field(name, bib_data)

    def _find_field(self, name, bib_data=None):
        """
        Find the field with the given ``name`` according to this rules:

        - If the given field ``name`` in in ``self.fields``, just return
          self.fields[name].

        - Otherwise, if ``name`` is ``"authors"`` or ``"editors"`` (or any other
          person role), return the list of names as a string, separated by
          ``" and "``.

        - Otherwise, if this entry has a ``crossreff`` field, look up for the
          cross-referenced entry and try to find its field with the given
          ``name``.
        """
        try:
            return self.fields[name]
        except KeyError:
            try:
                return self._find_person_field(name)
            except KeyError:
                return self._find_crossref_field(name, bib_data)