def run(aux_file, cache_file, bibboost_bib_file): """ This function assumes that we are in the folder of aux_file, and that aux_file is just a `basename` :param aux_file: the aux file the user want to process :param cache_file: the file in which the cache will be stored :param bibboost_bib_file: the file which contains the resulting bibtex entries :return: """ bib_files, citations = bibboost.aux.parse_aux_file(aux_file) with bibboost.cache.CacheBib(cache_file) as cache: cache.update_db(bib_files) used_entries = OrderedCaseInsensitiveDict( (key, entry) for key, entry in cache.get_entries(citations)) missed_entries = [key for key in citations if key not in used_entries] if len(missed_entries) > 0: logging.warning("missing entries: {}".format( ", ".join(missed_entries))) with open(bibboost_bib_file, "w") as f: f.write("""% This file is generated by bibboost % It should not be modified manually """) f.write("\n\n".join(e for e in used_entries.itervalues())) bibboost.aux.change_bib_file(aux_file, bibboost_bib_file)
class Entry(object): """A bibliography entry.""" key = None """Entry key (for example, ``'fukushima1980neocognitron'``).""" def __init__(self, type_, fields=None, persons=None, collection=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() """Entry type (``'book'``, ``'article'``, etc.).""" self.original_type = type_ self.fields = FieldDict(self, fields) """A dictionary of entry fields. The dictionary is ordered and case-insensitive.""" self.persons = OrderedCaseInsensitiveDict(persons) """A dictionary of entry persons, by their roles. The most often used roles are ``'author'`` and ``'editor'``. """ self.collection = collection # for BibTeX interpreter self.vars = {} def __eq__(self, other): if not isinstance(other, Entry): return super(Entry, self) == other return (self.type == other.type and self.fields == other.fields and self.persons == other.persons) def __repr__(self): # representing fields as FieldDict causes problems with representing # fields.parent, so represent it as a list of tuples repr_fields = repr(self.fields.items()) return 'Entry({type_}, fields={fields}, persons={persons})'.format( type_=repr(self.type), fields=repr_fields, persons=repr(self.persons), ) def get_crossref(self): return self.collection.entries[self.fields['crossref']] def add_person(self, person, role): self.persons.setdefault(role, []).append(person) def lower(self): return type(self)( self.type, fields=self.fields.lower(), persons=self.persons.lower(), collection=self.collection, )
def __init__(self, type_, fields=None, persons=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = OrderedCaseInsensitiveDict(fields) self.persons = OrderedCaseInsensitiveDict(persons)
def fetch_and_update(self, bibtex=True, latex_format='EU', append_bbl=False): existing_keys = self.bib().entries.keys() if bibtex else self.bbl_keys() existing_keys = [k.lower() for k in existing_keys] replace_keys = ODict() new_entries = ODict() type_name = 'BibTeX' if bibtex else 'LaTeX({})'.format(latex_format) for ref in self.references.values(): if ref.key.lower() in existing_keys: if DEBUG: print('skip existing: {}'.format(ref.key)) continue if Key.is_unknown(ref.key): print('WARNING: skip non-existing but unknown-type key {}'.format(ref.key)) continue try: print('fetching', type_name, 'from inspire:', ref.key, end=' ') if bibtex: ref.fetch_bibtex() else: ref.fetch_latex(latex_format) sys.stdout.flush() time.sleep(0.3) except RecordNotFound or MultipleRecordsFound as e: print('\nERROR: {}'.format(e)) continue if ref.new_key: replace_keys[ref.old_key] = ref print('->', ref.new_key, end=' ') sys.stdout.flush() time.sleep(0.3) if ref.key.lower() not in existing_keys: existing_keys.append(ref.key.lower()) new_entries[ref.key] = ref print('[new entry]', end='') print('') replacements = list() for ref in replace_keys.values(): for appearance in ref.positions: replacements.append((appearance, ref.old_key, ref.new_key)) self.replace_text(replacements) self.write_tex() new_ref_contents = '\n'.join(r.content for r in new_entries.values()) if bibtex: self.append_and_update_bib(new_ref_contents) else: self.modify_and_write_bbl(new_ref_contents, append_bbl)
def __init__(self, type_, fields=None, persons=None, collection=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = FieldDict(self, fields) self.persons = OrderedCaseInsensitiveDict(persons) self.collection = collection # for BibTeX interpreter self.vars = {}
class Entry(object): """Bibliography entry. Important members are: - persons (a dict of Person objects) - fields (all dict of string) """ def __init__(self, type_, fields=None, persons=None, collection=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = FieldDict(self, fields) self.persons = OrderedCaseInsensitiveDict(persons) self.collection = collection # for BibTeX interpreter self.vars = {} def __eq__(self, other): if not isinstance(other, Entry): return super(Entry, self) == other return ( self.type == other.type and self.fields == other.fields and self.persons == other.persons ) def __repr__(self): return 'Entry({type_}, fields={fields}, persons={persons})'.format( type_=repr(self.type), fields=repr(self.fields), persons=repr(self.persons), ) def get_crossref(self): return self.collection.entries[self.fields['crossref']] def add_person(self, person, role): self.persons.setdefault(role, []).append(person) def lower(self): return type(self)( self.type, fields=self.fields.lower(), persons=self.persons.lower(), collection=self.collection, )
def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): """ A :py:class:`.BibliographyData` object contains a dictionary of bibliography entries referenced by their keys. Each entry represented by an :py:class:`.Entry` object. Additionally, :py:class:`.BibliographyData` may contain a LaTeX preamble defined by ``@PREAMBLE`` commands in the BibTeX file. """ self.entries = OrderedCaseInsensitiveDict() '''A dictionary of bibliography entries referenced by their keys. The dictionary is case insensitive: >>> bib_data = parse_string(""" ... @ARTICLE{gnats, ... author = {L[eslie] A. Aamport}, ... title = {The Gnats and Gnus Document Preparation System}, ... } ... """, 'bibtex') >>> bib_data.entries['gnats'] == bib_data.entries['GNATS'] True ''' self.crossref_count = CaseInsensitiveDefaultDict(int) self.min_crossrefs = min_crossrefs self._preamble = [] if wanted_entries is not None: self.wanted_entries = CaseInsensitiveSet(wanted_entries) self.citations = CaseInsensitiveSet(wanted_entries) else: self.wanted_entries = None self.citations = CaseInsensitiveSet() if entries: if isinstance(entries, Mapping): entries = entries.items() for (key, entry) in entries: self.add_entry(key, entry) if preamble: self._preamble.extend(preamble)
def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): self.entries = OrderedCaseInsensitiveDict() self.crossref_count = CaseInsensitiveDefaultDict(int) self.min_crossrefs = min_crossrefs self._preamble = [] if wanted_entries is not None: self.wanted_entries = CaseInsensitiveSet(wanted_entries) self.citations = CaseInsensitiveSet(wanted_entries) else: self.wanted_entries = None self.citations = CaseInsensitiveSet() if entries: if isinstance(entries, Mapping): entries = entries.iteritems() for (key, entry) in entries: self.add_entry(key, entry) if preamble: self._preamble.extend(preamble)
def filter_bibolamazifile(self, bibolamazifile): jobname = auxfile.get_action_jobname(self.jobname, bibolamazifile) citations = auxfile.get_all_auxfile_citations(jobname, bibolamazifile, self.name(), self.search_dirs, return_set=True) logger.longdebug("set of citations: %r", citations) bibdata = bibolamazifile.bibliographyData() newentries = OrderedCaseInsensitiveDict() for key,entry in iteritems(bibdata.entries): if key in citations: newentries[key] = entry logger.longdebug("the new database has entries %r" %(newentries.keys())) bibolamazifile.setEntries(iteritems(newentries)) return
def __init__(self, type_, fields=None, persons=None, collection=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() """Entry type (``'book'``, ``'article'``, etc.).""" self.original_type = type_ self.fields = FieldDict(self, fields) """A dictionary of entry fields. The dictionary is ordered and case-insensitive.""" self.persons = OrderedCaseInsensitiveDict(persons) """A dictionary of entry persons, by their roles. The most often used roles are ``'author'`` and ``'editor'``. """ self.collection = collection # for BibTeX interpreter self.vars = {}
def filter_bibolamazifile(self, bibolamazifile): logger.debug("Getting list of used citations from %s.aux." %(self.jobname)) citations = auxfile.get_all_auxfile_citations(self.jobname, bibolamazifile, self.name(), self.search_dirs, return_set=True); logger.longdebug("set of citations: %r"%(citations)) bibdata = bibolamazifile.bibliographyData() newentries = OrderedCaseInsensitiveDict() for key,entry in bibdata.entries.iteritems(): if key in citations: newentries[key] = entry logger.longdebug("the new database has entries %r" %(newentries.keys())) bibolamazifile.setEntries(newentries.iteritems()) return
def convert_bibtex_keys(input_file: str, output_file: str): """ Convert keys in a bibtex file to Google Scholar format. @input_file: string, input file name. @output_file: string, output file name. """ bib_data = parse_file(input_file) keys, new_keys = obtain_replace_keys(bib_data) new_entries = OrderedCaseInsensitiveDict() for key, new_key in zip(keys, new_keys): new_entries[new_key] = bib_data.entries[key] bib_data.entries = new_entries bib_data = update_arxiv_information(bib_data) with open(output_file, 'w', encoding='utf-8') as ofile: bib_data.to_file(ofile)
def __init__(self, type_, fields=None, persons=None, collection=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = FieldDict(self, fields) self.rich_fields = RichFieldProxyDict(self.fields) self.persons = OrderedCaseInsensitiveDict(persons) self.collection = collection # for BibTeX interpreter self.vars = {}
def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): """ A :py:class:`.BibliographyData` object contains a dictionary of bibliography entries referenced by their keys. Each entry represented by an :py:class:`.Entry` object. Additionally, :py:class:`.BibliographyData` may contain a LaTeX preamble defined by ``@PREAMBLE`` commands in the BibTeX file. """ self.entries = OrderedCaseInsensitiveDict() '''A dictionary of bibliography entries referenced by their keys. The dictionary is case insensitive: >>> bib_data = parse_string(""" ... @ARTICLE{gnats, ... author = {L[eslie] A. Aamport}, ... title = {The Gnats and Gnus Document Preparation System}, ... } ... """, 'bibtex') >>> bib_data.entries['gnats'] == bib_data.entries['GNATS'] True ''' self.crossref_count = CaseInsensitiveDefaultDict(int) self.min_crossrefs = min_crossrefs self._preamble = [] if wanted_entries is not None: self.wanted_entries = CaseInsensitiveSet(wanted_entries) self.citations = CaseInsensitiveSet(wanted_entries) else: self.wanted_entries = None self.citations = CaseInsensitiveSet() if entries: if isinstance(entries, Mapping): entries = entries.iteritems() for (key, entry) in entries: self.add_entry(key, entry) if preamble: self._preamble.extend(preamble)
def __init__(self, filename): self.text = None errors = [] possible_paths = [filename, filename + '.tex'] for path in possible_paths: try: with open(path, mode='r') as file: self.text_original = file.read() self.text = self.text_original self.filename = path self.stem = os.path.splitext(path)[0] break except IOError as e: errors.append(e) pass if self.text is None: raise FileLookupFailedError(errors=errors, paths=possible_paths) self._bib_name = None self._bib = None self._bbl = None # generate references text_uncommented = self.strip_comment(self.text) self.references = ODict() # Order is important! for cite in self.CITE_REGEX.finditer(text_uncommented): pos = Position(str=text_uncommented[:cite.start()]) pos.shift(cite.group('pre')) for key_raw in re.split(r',', cite.group('body')): stripping = re.match(r'^(\s*)(\S+)(\s*)$', key_raw) pos.shift(stripping.group(1)) key = stripping.group(2) if key not in self.references: self.references[key] = Ref(key, position=pos.copy()) else: self.references[key].positions.append(pos.copy()) pos.shift(stripping.group(2)).shift(stripping.group(3)) pos.shift(',')
class BibliographyData(object): def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): """ A :py:class:`.BibliographyData` object contains a dictionary of bibliography entries referenced by their keys. Each entry represented by an :py:class:`.Entry` object. Additionally, :py:class:`.BibliographyData` may contain a LaTeX preamble defined by ``@PREAMBLE`` commands in the BibTeX file. """ self.entries = OrderedCaseInsensitiveDict() '''A dictionary of bibliography entries referenced by their keys. The dictionary is case insensitive: >>> bib_data = parse_string(""" ... @ARTICLE{gnats, ... author = {L[eslie] A. Aamport}, ... title = {The Gnats and Gnus Document Preparation System}, ... } ... """, 'bibtex') >>> bib_data.entries['gnats'] == bib_data.entries['GNATS'] True ''' self.crossref_count = CaseInsensitiveDefaultDict(int) self.min_crossrefs = min_crossrefs self._preamble = [] if wanted_entries is not None: self.wanted_entries = CaseInsensitiveSet(wanted_entries) self.citations = CaseInsensitiveSet(wanted_entries) else: self.wanted_entries = None self.citations = CaseInsensitiveSet() if entries: if isinstance(entries, Mapping): entries = entries.iteritems() for (key, entry) in entries: self.add_entry(key, entry) if preamble: self._preamble.extend(preamble) def __eq__(self, other): if not isinstance(other, BibliographyData): return super(BibliographyData, self) == other return ( self.entries == other.entries and self._preamble == other._preamble ) def __repr__(self): return 'BibliographyData(entries={entries}, preamble={preamble})'.format( entries=repr(self.entries), preamble=repr(self._preamble), ) def add_to_preamble(self, *values): self._preamble.extend(values) @property def preamble(self): r''' LaTeX preamble. >>> bib_data = parse_string(r""" ... @PREAMBLE{"\newcommand{\noopsort}[1]{}"} ... """, 'bibtex') >>> print bib_data.preamble \newcommand{\noopsort}[1]{} .. versionadded:: 0.19 Earlier versions used :py:meth:`.get_preamble()`, which is now deprecated. ''' return ''.join(self._preamble) @deprecated('0.19', 'use BibliographyData.preamble instead') def get_preamble(self): """ .. deprecated:: 0.19 Use :py:attr:`.preamble` instead. """ return self.preamble def want_entry(self, key): return ( self.wanted_entries is None or key in self.wanted_entries or '*' in self.wanted_entries ) def get_canonical_key(self, key): if key in self.citations: return self.citations.get_canonical_key(key) else: return key def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.collection = self entry.key = self.get_canonical_key(key) self.entries[entry.key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: if self.wanted_entries is not None: self.wanted_entries.add(crossref) def add_entries(self, entries): for key, entry in entries: self.add_entry(key, entry) def _get_crossreferenced_citations(self, citations, min_crossrefs): """ Get cititations not cited explicitly but referenced by other citations. >>> from pybtex.database import Entry >>> data = BibliographyData({ ... 'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}), ... 'xrefd_arcicle': Entry('article'), ... }) >>> list(data._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys()) >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] """ crossref_count = CaseInsensitiveDefaultDict(int) citation_set = CaseInsensitiveSet(citations) for citation in citations: try: entry = self.entries[citation] crossref = entry.fields['crossref'] except KeyError: continue try: crossref_entry = self.entries[crossref] except KeyError: report_error(BibliographyDataError( 'bad cross-reference: entry "{key}" refers to ' 'entry "{crossref}" which does not exist.'.format( key=citation, crossref=crossref, ) )) continue canonical_crossref = crossref_entry.key crossref_count[canonical_crossref] += 1 if crossref_count[canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set: citation_set.add(canonical_crossref) yield canonical_crossref def _expand_wildcard_citations(self, citations): """ Expand wildcard citations (\citation{*} in .aux file). >>> from pybtex.database import Entry >>> data = BibliographyData(( ... ('uno', Entry('article')), ... ('dos', Entry('article')), ... ('tres', Entry('article')), ... ('cuatro', Entry('article')), ... )) >>> list(data._expand_wildcard_citations([])) [] >>> list(data._expand_wildcard_citations(['*'])) ['uno', 'dos', 'tres', 'cuatro'] >>> list(data._expand_wildcard_citations(['uno', '*'])) ['uno', 'dos', 'tres', 'cuatro'] >>> list(data._expand_wildcard_citations(['dos', '*'])) ['dos', 'uno', 'tres', 'cuatro'] >>> list(data._expand_wildcard_citations(['*', 'uno'])) ['uno', 'dos', 'tres', 'cuatro'] >>> list(data._expand_wildcard_citations(['*', 'DOS'])) ['uno', 'dos', 'tres', 'cuatro'] """ citation_set = CaseInsensitiveSet() for citation in citations: if citation == '*': for key in self.entries: if key not in citation_set: citation_set.add(key) yield key else: if citation not in citation_set: citation_set.add(citation) yield citation def add_extra_citations(self, citations, min_crossrefs): expanded_citations = list(self._expand_wildcard_citations(citations)) crossrefs = list(self._get_crossreferenced_citations(expanded_citations, min_crossrefs)) return expanded_citations + crossrefs def to_string(self, bib_format, **kwargs): """ Return the data as a unicode string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). .. versionadded:: 0.19 """ writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) return writer.to_string(self) def to_bytes(self, bib_format, **kwargs): """ Return the data as a byte string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). .. versionadded:: 0.19 """ writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) return writer.to_bytes(self) def to_file(self, file, bib_format=None, **kwargs): """ Save the data to a file. :param file: A file name or a file-like object. :param bib_format: Data format ("bibtex", "yaml", etc.). If not specified, Pybtex will try to guess by the file name. .. versionadded:: 0.19 """ if isinstance(file, basestring): filename = file else: filename = getattr(file, 'name', None) writer = find_plugin('pybtex.database.output', bib_format, filename=filename)(**kwargs) return writer.write_file(self, file) def lower(self): u''' Return another :py:class:`.BibliographyData` with all identifiers converted to lowercase. >>> data = parse_string(""" ... @BOOK{Obrazy, ... title = "Obrazy z Rus", ... author = "Karel Havlíček Borovský", ... } ... @BOOK{Elegie, ... title = "Tirolské elegie", ... author = "Karel Havlíček Borovský", ... } ... """, 'bibtex') >>> data_lower = data.lower() >>> data_lower.entries.keys() ['obrazy', 'elegie'] >>> for entry in data_lower.entries.values(): ... entry.key ... entry.persons.keys() ... entry.fields.keys() 'obrazy' ['author'] ['title'] 'elegie' ['author'] ['title'] ''' entries_lower = ((key.lower(), entry.lower()) for key, entry in self.entries.iteritems()) return type(self)( entries=entries_lower, preamble=self._preamble, wanted_entries=self.wanted_entries, min_crossrefs=self.min_crossrefs, )
class BibliographyData(object): def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): """ A :py:class:`.BibliographyData` object contains a dictionary of bibliography entries referenced by their keys. Each entry represented by an :py:class:`.Entry` object. Additionally, :py:class:`.BibliographyData` may contain a LaTeX preamble defined by ``@PREAMBLE`` commands in the BibTeX file. """ self.entries = OrderedCaseInsensitiveDict() '''A dictionary of bibliography entries referenced by their keys. The dictionary is case insensitive: >>> bib_data = parse_string(""" ... @ARTICLE{gnats, ... author = {L[eslie] A. Aamport}, ... title = {The Gnats and Gnus Document Preparation System}, ... } ... """, 'bibtex') >>> bib_data.entries['gnats'] == bib_data.entries['GNATS'] True ''' self.crossref_count = CaseInsensitiveDefaultDict(int) self.min_crossrefs = min_crossrefs self._preamble = [] if wanted_entries is not None: self.wanted_entries = CaseInsensitiveSet(wanted_entries) self.citations = CaseInsensitiveSet(wanted_entries) else: self.wanted_entries = None self.citations = CaseInsensitiveSet() if entries: if isinstance(entries, Mapping): entries = entries.items() for (key, entry) in entries: self.add_entry(key, entry) if preamble: self._preamble.extend(preamble) def __eq__(self, other): if not isinstance(other, BibliographyData): return super(BibliographyData, self) == other return (self.entries == other.entries and self._preamble == other._preamble) def __repr__(self): repr_entry = repr(self.entries) keys = self.entries.keys() for key in keys: ind = repr_entry.index(key) - 2 # find first instance repr_entry = repr_entry[:ind] + "\n" + repr_entry[ind:] repr_entry = indent(repr_entry, prefix=" ") repr_entry = repr_entry[4:] # drop 1st indent return ("BibliographyData(\n" " entries={0},\n\n" " preamble={1})".format(repr_entry, repr(self._preamble))) def add_to_preamble(self, *values): self._preamble.extend(values) @property def preamble(self): r''' LaTeX preamble. >>> bib_data = parse_string(r""" ... @PREAMBLE{"\newcommand{\noopsort}[1]{}"} ... """, 'bibtex') >>> print(bib_data.preamble) \newcommand{\noopsort}[1]{} .. versionadded:: 0.19 Earlier versions used :py:meth:`.get_preamble()`, which is now deprecated. ''' return ''.join(self._preamble) @deprecated('0.19', 'use BibliographyData.preamble instead') def get_preamble(self): """ .. deprecated:: 0.19 Use :py:attr:`.preamble` instead. """ return self.preamble def want_entry(self, key): return (self.wanted_entries is None or key in self.wanted_entries or '*' in self.wanted_entries) def get_canonical_key(self, key): if key in self.citations: return self.citations.get_canonical_key(key) else: return key def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error( BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.key = self.get_canonical_key(key) self.entries[entry.key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: if self.wanted_entries is not None: self.wanted_entries.add(crossref) def add_entries(self, entries): for key, entry in entries: self.add_entry(key, entry) @fix_unicode_literals_in_doctest def _get_crossreferenced_citations(self, citations, min_crossrefs): r""" Get cititations not cited explicitly but referenced by other citations. >>> from pybtex.database import Entry >>> data = BibliographyData({ ... 'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}), ... 'xrefd_arcicle': Entry('article'), ... }) >>> list(data._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys()) >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] """ crossref_count = CaseInsensitiveDefaultDict(int) citation_set = CaseInsensitiveSet(citations) for citation in citations: try: entry = self.entries[citation] crossref = entry.fields['crossref'] except KeyError: continue try: crossref_entry = self.entries[crossref] except KeyError: report_error( BibliographyDataError( 'bad cross-reference: entry "{key}" refers to ' 'entry "{crossref}" which does not exist.'.format( key=citation, crossref=crossref, ))) continue canonical_crossref = crossref_entry.key crossref_count[canonical_crossref] += 1 if crossref_count[ canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set: citation_set.add(canonical_crossref) yield canonical_crossref @fix_unicode_literals_in_doctest def _expand_wildcard_citations(self, citations): r""" Expand wildcard citations (\citation{*} in .aux file). >>> from pybtex.database import Entry >>> data = BibliographyData(( ... ('uno', Entry('article')), ... ('dos', Entry('article')), ... ('tres', Entry('article')), ... ('cuatro', Entry('article')), ... )) >>> list(data._expand_wildcard_citations([])) [] >>> list(data._expand_wildcard_citations(['*'])) [u'uno', u'dos', u'tres', u'cuatro'] >>> list(data._expand_wildcard_citations(['uno', '*'])) [u'uno', u'dos', u'tres', u'cuatro'] >>> list(data._expand_wildcard_citations(['dos', '*'])) [u'dos', u'uno', u'tres', u'cuatro'] >>> list(data._expand_wildcard_citations(['*', 'uno'])) [u'uno', u'dos', u'tres', u'cuatro'] >>> list(data._expand_wildcard_citations(['*', 'DOS'])) [u'uno', u'dos', u'tres', u'cuatro'] """ citation_set = CaseInsensitiveSet() for citation in citations: if citation == '*': for key in self.entries: if key not in citation_set: citation_set.add(key) yield key else: if citation not in citation_set: citation_set.add(citation) yield citation def add_extra_citations(self, citations, min_crossrefs): expanded_citations = list(self._expand_wildcard_citations(citations)) crossrefs = list( self._get_crossreferenced_citations(expanded_citations, min_crossrefs)) return expanded_citations + crossrefs def to_string(self, bib_format, **kwargs): """ Return the data as a unicode string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). .. versionadded:: 0.19 """ writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) return writer.to_string(self) @classmethod def from_string(cls, value, bib_format, **kwargs): """ Return the data from a unicode string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). .. versionadded:: 0.22.2 """ return parse_string(value, bib_format, **kwargs) def to_bytes(self, bib_format, **kwargs): """ Return the data as a byte string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). .. versionadded:: 0.19 """ writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) return writer.to_bytes(self) def to_file(self, file, bib_format=None, **kwargs): """ Save the data to a file. :param file: A file name or a file-like object. :param bib_format: Data format ("bibtex", "yaml", etc.). If not specified, Pybtex will try to guess by the file name. .. versionadded:: 0.19 """ if isinstance(file, six.string_types): filename = file else: filename = getattr(file, 'name', None) writer = find_plugin('pybtex.database.output', bib_format, filename=filename)(**kwargs) return writer.write_file(self, file) @fix_unicode_literals_in_doctest def lower(self): u''' Return another :py:class:`.BibliographyData` with all identifiers converted to lowercase. >>> data = parse_string(""" ... @BOOK{Obrazy, ... title = "Obrazy z Rus", ... author = "Karel Havlíček Borovský", ... } ... @BOOK{Elegie, ... title = "Tirolské elegie", ... author = "Karel Havlíček Borovský", ... } ... """, 'bibtex') >>> data_lower = data.lower() >>> list(data_lower.entries.keys()) [u'obrazy', u'elegie'] >>> for entry in data_lower.entries.values(): ... entry.key ... list(entry.persons.keys()) ... list(entry.fields.keys()) u'obrazy' [u'author'] [u'title'] u'elegie' [u'author'] [u'title'] ''' entries_lower = ((key.lower(), entry.lower()) for key, entry in self.entries.items()) return type(self)( entries=entries_lower, preamble=self._preamble, wanted_entries=self.wanted_entries, min_crossrefs=self.min_crossrefs, )
class BibliographyData(object): def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): self.entries = OrderedCaseInsensitiveDict() self.crossref_count = CaseInsensitiveDefaultDict(int) self.min_crossrefs = min_crossrefs self._preamble = [] if wanted_entries is not None: self.wanted_entries = CaseInsensitiveSet(wanted_entries) self.citations = CaseInsensitiveSet(wanted_entries) else: self.wanted_entries = None self.citations = CaseInsensitiveSet() if entries: if isinstance(entries, Mapping): entries = iter(entries.items()) for (key, entry) in entries: self.add_entry(key, entry) if preamble: self._preamble.extend(preamble) def __eq__(self, other): if not isinstance(other, BibliographyData): return super(BibliographyData, self) == other return (self.entries == other.entries and self._preamble == other._preamble) def __repr__(self): return 'BibliographyData(entries={entries}, preamble={preamble})'.format( entries=repr(self.entries), preamble=repr(self._preamble), ) def add_to_preamble(self, *values): self._preamble.extend(values) @deprecated('0.17', 'use get_preamble instead') def preamble(self): return self.get_preamble() def get_preamble(self): return ''.join(self._preamble) def want_entry(self, key): return (self.wanted_entries is None or key in self.wanted_entries or '*' in self.wanted_entries) def get_canonical_key(self, key): if key in self.citations: return self.citations.get_canonical_key(key) else: return key def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error( BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.collection = self entry.key = self.get_canonical_key(key) self.entries[entry.key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: if self.wanted_entries is not None: self.wanted_entries.add(crossref) def add_entries(self, entries): for key, entry in entries: self.add_entry(key, entry) def get_crossreferenced_citations(self, citations, min_crossrefs): """ Get cititations not cited explicitly but referenced by other citations. >>> from pybtex.database import Entry >>> data = BibliographyData({ ... 'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}), ... 'xrefd_arcicle': Entry('article'), ... }) >>> list(data.get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data.get_crossreferenced_citations(['main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data.get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data.get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data.get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> data2 = BibliographyData(data.entries, wanted_entries=list(data.entries.keys())) >>> list(data2.get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data2.get_crossreferenced_citations(['main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data2.get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data2.get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data2.get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> list(data2.get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] """ crossref_count = CaseInsensitiveDefaultDict(int) citation_set = CaseInsensitiveSet(citations) for citation in citations: try: entry = self.entries[citation] crossref = entry.fields['crossref'] except KeyError: continue try: crossref_entry = self.entries[crossref] except KeyError: report_error( BibliographyDataError( 'bad cross-reference: entry "{key}" refers to ' 'entry "{crossref}" which does not exist.'.format( key=citation, crossref=crossref, ))) continue canonical_crossref = crossref_entry.key crossref_count[canonical_crossref] += 1 if crossref_count[ canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set: citation_set.add(canonical_crossref) yield canonical_crossref def expand_wildcard_citations(self, citations): """ Expand wildcard citations (\citation{*} in .aux file). >>> from pybtex.database import Entry >>> data = BibliographyData(( ... ('uno', Entry('article')), ... ('dos', Entry('article')), ... ('tres', Entry('article')), ... ('cuatro', Entry('article')), ... )) >>> list(data.expand_wildcard_citations([])) [] >>> list(data.expand_wildcard_citations(['*'])) ['uno', 'dos', 'tres', 'cuatro'] >>> list(data.expand_wildcard_citations(['uno', '*'])) ['uno', 'dos', 'tres', 'cuatro'] >>> list(data.expand_wildcard_citations(['dos', '*'])) ['dos', 'uno', 'tres', 'cuatro'] >>> list(data.expand_wildcard_citations(['*', 'uno'])) ['uno', 'dos', 'tres', 'cuatro'] >>> list(data.expand_wildcard_citations(['*', 'DOS'])) ['uno', 'dos', 'tres', 'cuatro'] """ citation_set = CaseInsensitiveSet() for citation in citations: if citation == '*': for key in self.entries: if key not in citation_set: citation_set.add(key) yield key else: if citation not in citation_set: citation_set.add(citation) yield citation def add_extra_citations(self, citations, min_crossrefs): expanded_citations = list(self.expand_wildcard_citations(citations)) crossrefs = list( self.get_crossreferenced_citations(expanded_citations, min_crossrefs)) return expanded_citations + crossrefs def lower(self): """ Return another BibliographyData with all identifiers converted to lowercase. >>> data = BibliographyData([ ... ('Obrazy', Entry('Book', [('Title', 'Obrazy z Rus')], [('Author', 'Karel Havlíček Borovský')])), ... ('Elegie', Entry('BOOK', [('TITLE', 'Tirolské elegie')], [('AUTHOR', 'Karel Havlíček Borovský')])), ... ]).lower() >>> list(data.entries.keys()) ['obrazy', 'elegie'] >>> for entry in list(data.entries.values()): ... entry.key ... list(entry.persons.keys()) ... list(entry.fields.keys()) 'obrazy' ['author'] ['title'] 'elegie' ['author'] ['title'] """ entries_lower = ((key.lower(), entry.lower()) for key, entry in self.entries.items()) return type(self)( entries=entries_lower, preamble=self._preamble, wanted_entries=self.wanted_entries, min_crossrefs=self.min_crossrefs, )
class Entry(object): """A bibliography entry.""" type = None """Entry type (``'book'``, ``'article'``, etc.).""" key = None """Entry key (for example, ``'fukushima1980neocognitron'``).""" fields = None """A dictionary of entry fields. The dictionary is ordered and case-insensitive.""" persons = None """ A dictionary of entry persons, by their roles. The most often used roles are ``'author'`` and ``'editor'``. """ """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs.""" def __init__(self, type_, fields=None, persons=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = OrderedCaseInsensitiveDict(fields) self.persons = OrderedCaseInsensitiveDict(persons) def __eq__(self, other): if not isinstance(other, Entry): return super(Entry, self) == other return (self.type == other.type and self.fields == other.fields and self.persons == other.persons) def __repr__(self): # represent the fields as a list of tuples for simplicity repr_fields = repr(list(self.fields.items())) keys = self.fields.keys() for key in keys: ind = repr_fields.index(key) - 2 # find first instance repr_fields = repr_fields[:ind] + "\n" + repr_fields[ind:] repr_fields = indent(repr_fields, prefix=" ") repr_fields = repr_fields[4:] # drop 1st indent return ("Entry({0},\n" " fields={1},\n" " persons={2})".format(repr(self.type), repr_fields, repr(self.persons))) def add_person(self, person, role): self.persons.setdefault(role, []).append(person) def lower(self): return type(self)( self.type, fields=self.fields.lower(), persons=self.persons.lower(), ) def _find_person_field(self, role): persons = self.persons[role] return ' and '.join(six.text_type(person) for person in persons) def _find_crossref_field(self, name, bib_data): if bib_data is None or 'crossref' not in self.fields: raise KeyError(name) referenced_entry = bib_data.entries[self.fields['crossref']] return referenced_entry._find_field(name, bib_data) def _find_field(self, name, bib_data=None): """ Find the field with the given ``name`` according to this rules: - If the given field ``name`` in in ``self.fields``, just return self.fields[name]. - Otherwise, if ``name`` is ``"authors"`` or ``"editors"`` (or any other person role), return the list of names as a string, separated by ``" and "``. - Otherwise, if this entry has a ``crossreff`` field, look up for the cross-referenced entry and try to find its field with the given ``name``. """ try: return self.fields[name] except KeyError: try: return self._find_person_field(name) except KeyError: return self._find_crossref_field(name, bib_data) def to_string(self, bib_format, **kwargs): """ Return the data as a unicode string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). """ writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) return writer.to_string(BibliographyData(entries={self.key: self})) @classmethod def from_string(cls, value, bib_format, entry_number=0, **kwargs): """ Return the data from a unicode string in the given format. :param bib_format: Data format ("bibtex", "yaml", etc.). :param entry_number: entry number if the string has more than one. .. versionadded:: 0.22.2 """ # get bibliography bibdata = BibliographyData.from_string(value, bib_format, **kwargs) # grab specific instance key = tuple(bibdata.entries.keys())[entry_number] return bibdata.entries[key]
class TeX(object): CITE_REGEX = re.compile(r'(?P<pre>(\\cite(\[.*?\])?{))(?P<body>.*?)}', re.DOTALL) CITE_BIB_IN_TEX = re.compile(r'\\bibliography{(.*?)}', re.DOTALL) CITE_BIB_IN_BBL = re.compile(r'\\bibitem{(.*?)}', re.DOTALL) COMMENTS_REGEX = re.compile(r'((?:^|[^\\])(?:\\\\)*)%.*$', re.MULTILINE) @classmethod def strip_comment(cls, string): return cls.COMMENTS_REGEX.sub(r'\1', string) def __init__(self, filename): self.text = None errors = [] possible_paths = [filename, filename + '.tex'] for path in possible_paths: try: with open(path, mode='r') as file: self.text_original = file.read() self.text = self.text_original self.filename = path self.stem = os.path.splitext(path)[0] break except IOError as e: errors.append(e) pass if self.text is None: raise FileLookupFailedError(errors=errors, paths=possible_paths) self._bib_name = None self._bib = None self._bbl = None # generate references text_uncommented = self.strip_comment(self.text) self.references = ODict() # Order is important! for cite in self.CITE_REGEX.finditer(text_uncommented): pos = Position(str=text_uncommented[:cite.start()]) pos.shift(cite.group('pre')) for key_raw in re.split(r',', cite.group('body')): stripping = re.match(r'^(\s*)(\S+)(\s*)$', key_raw) pos.shift(stripping.group(1)) key = stripping.group(2) if key not in self.references: self.references[key] = Ref(key, position=pos.copy()) else: self.references[key].positions.append(pos.copy()) pos.shift(stripping.group(2)).shift(stripping.group(3)) pos.shift(',') def write_tex(self): with open(self.filename, mode='w') as file: file.write(self.text) def bbl_name(self): return self.stem + '.bbl' def bbl(self): if self._bbl is None: if self.bbl_name() and os.path.exists(self.bbl_name()): try: with open(self.bbl_name(), mode='r') as file: self._bbl = file.read() except: pass self._bbl = self._bbl or "" return self._bbl def modify_and_write_bbl(self, new_content, append=True): begin = '\\begin{thebibliography}{99}' end = '\\end{thebibliography}' if append: sep = re.split(r'\\end\s*{\s*thebibliography\s*}', self.bbl(), maxsplit=1) (bbl, footer) = sep if len(sep) == 2 else (sep[0], "") self._bbl = '\n'.join([bbl, new_content, end + footer]) else: self._bbl = '\n\n'.join([begin, new_content, end]) with open(self.bbl_name(), mode='w') as file: file.write(self.bbl()) def bib_name(self): if self._bib_name is None: bib_keys = self.CITE_BIB_IN_TEX.findall(self.strip_comment(self.text)) stem = None for bib_key in bib_keys: for bib in re.split(r'\s*,\s*', bib_key): if stem is None: stem = bib elif stem != bib: raise MultipleBibError if stem is None: self._bib_name = False # for "not found" else: self._bib_name = os.path.join(os.path.dirname(self.filename), stem + '.bib') return self._bib_name def bib(self): if self._bib is None: if self.bib_name() and os.path.exists(self.bib_name()): try: self._bib = pybtex.database.parse_file(self.bib_name(), bib_format='bibtex') except: pass self._bib = self._bib or pybtex.database.BibliographyData() return self._bib # pybtex output are a bit buggy and avoided. # def append_bib(self, entries): # # self._bib.add_entries(entries) # maybe buggy? # for k in entries.order: # self._bib.add_entry(k, entries[k]) # def update_bib(self): # if self.bib_name(): # self._bib.to_file(self.bib_name(), bib_format='bibtex') # else: # raise RuntimeError() def append_and_update_bib(self, new_text): with open(self.bib_name(), mode='a') as file: file.write('\n' + new_text + '\n') self._bib = None # clear and to be reload def bbl_keys(self): try: with open(self.bbl_name(), mode='r') as file: return self.CITE_BIB_IN_BBL.findall(self.strip_comment(file.read())) except IOError: return [] def fetch_and_update(self, bibtex=True, latex_format='EU', append_bbl=False): existing_keys = self.bib().entries.keys() if bibtex else self.bbl_keys() existing_keys = [k.lower() for k in existing_keys] replace_keys = ODict() new_entries = ODict() type_name = 'BibTeX' if bibtex else 'LaTeX({})'.format(latex_format) for ref in self.references.values(): if ref.key.lower() in existing_keys: if DEBUG: print('skip existing: {}'.format(ref.key)) continue if Key.is_unknown(ref.key): print('WARNING: skip non-existing but unknown-type key {}'.format(ref.key)) continue try: print('fetching', type_name, 'from inspire:', ref.key, end=' ') if bibtex: ref.fetch_bibtex() else: ref.fetch_latex(latex_format) sys.stdout.flush() time.sleep(0.3) except RecordNotFound or MultipleRecordsFound as e: print('\nERROR: {}'.format(e)) continue if ref.new_key: replace_keys[ref.old_key] = ref print('->', ref.new_key, end=' ') sys.stdout.flush() time.sleep(0.3) if ref.key.lower() not in existing_keys: existing_keys.append(ref.key.lower()) new_entries[ref.key] = ref print('[new entry]', end='') print('') replacements = list() for ref in replace_keys.values(): for appearance in ref.positions: replacements.append((appearance, ref.old_key, ref.new_key)) self.replace_text(replacements) self.write_tex() new_ref_contents = '\n'.join(r.content for r in new_entries.values()) if bibtex: self.append_and_update_bib(new_ref_contents) else: self.modify_and_write_bbl(new_ref_contents, append_bbl) def replace_text(self, replacement_rules): # replace from the end to the beginning not to break the positions replacement_rules = sorted(replacement_rules, key=lambda x: x[0], reverse=True) lines = Position.LINESEP_REGEX.split(self.text) for pos, old, new in replacement_rules: if lines[pos.l][pos.c:pos.c + len(old)] == old: lines[pos.l] = lines[pos.l][:pos.c] + new + lines[pos.l][pos.c + len(old):] else: raise ReplacementError(pos.l, pos.c, old, new, lines[pos.l][pos.c:pos.c + len(old)]) self.text = '\n'.join(lines)
def cmd_makecldf(self, args): languages = { o["slug"]: o for o in self.raw_dir.read_json(self.raw_dir / "languages.json") } words = { o["slug"]: o for o in self.raw_dir.read_json(self.raw_dir / "words.json") } sources = { o["slug"]: o for o in self.raw_dir.read_json(self.raw_dir / "sources.json") } # handle sources # want to make sure that the bibtex key matches our source id. for source in sorted(sources): # this is ugly, I wish pybtex made this easier! bib = parse_string(sources[source]["bibtex"], "bibtex") old_key = list(bib.entries.keys())[0] bib.entries[old_key].key = source bib.entries = OrderedCaseInsensitiveDict([(source, bib.entries[old_key])]) args.writer.add_sources(bib) # handle languages for lang in sorted(languages): args.writer.add_language( ID=lang, Name=languages[lang]["fullname"], ISO639P3code=languages[lang]["isocode"], Glottocode=languages[lang]["glottocode"], ) # handle concepts concepts = {} for concept in self.conceptlists[0].concepts.values(): idx = '{0}_{1}'.format(concept.number, slug(concept.english)) args.writer.add_concept( ID=idx, Name=concept.english, Concepticon_ID=concept.concepticon_id, Concepticon_Gloss=concept.concepticon_gloss) concepts[concept.english] = idx concepts[concept.english.replace(" ", "-")] = idx concepts[concept.english.replace(" ", "-").lower()] = idx concepts[slug(concept.english)] = idx concepts["-".join([slug(x) for x in concept.english.split()])] = idx if '(' in concept.english: new_string = concept.english[:concept.english.index('(') - 1] concepts["-".join([slug(x) for x in new_string.split()])] = idx concepts[concept.english[:concept.english.index('(') - 1]] = idx concepts[concept.english[:concept.english.index('(') - 1].replace(' ', '-').lower()] = idx if concept.english.startswith("to "): new_string = concept.english[3:] concepts['-'.join([slug(x) for x in new_string.split()])] = idx concepts[concept.english.replace("to ", "")] = idx concepts["mans-mother-law"] = concepts["man's mother in law"] concepts["brother-law"] = concepts["brother in law"] concepts["to-make-hole"] = concepts["make hole (in ground)"] concepts["front"] = concepts["in front"] concepts["husk-nut"] = concepts["husk (of nut)"] concepts["his"] = concepts["his, hers, its (pronoun p:3s)"] concepts["we-two-incl"] = concepts[ "we incl. dual (pronoun d:1p, incl, dual)"] concepts["intrnasitivizer"] = concepts["intransitivizer"] concepts["short-piece-wood"] = concepts["short-piece-of-wood"] concepts["top-foot"] = concepts["top (of foot)"] concepts["sit-feet-and-legs-together"] = concepts[ "sit (with feet and legs together)"] concepts["earth"] = concepts["earth/soil"] concepts["warm"] = concepts["warm/hot"] concepts["your-sg"] = concepts["your (pronoun: p:2s)"] concepts["-law"] = concepts["in-law"] concepts["to-roast"] = concepts["roast"] concepts["arrow-barred"] = concepts[ "arrow (barred) (Arrow with cross bar)"] concepts["them-dual"] = concepts["them (pronoun o:3p, dual)"] concepts["you-dual"] = concepts["you (pronoun d:2s)"] concepts["right-correct"] = concepts["right (correct, true)"] concepts["betelpepper"] = concepts["betelpepper vine"] concepts["to-chop"] = concepts["to chop, cut down"] concepts["road"] = concepts["road/path"] concepts["for-benefactive-clitic"] = concepts[ "for (benefactive) ((cliticised or suffixed to noun))"] concepts["mans-father-law"] = concepts["mans' father in law"] concepts["sister-law"] = concepts["sister in law"] concepts["you-o2s"] = concepts["you (pronoun o:2s)"] concepts["you-pl-o2p"] = concepts["you pl. (pronoun o:2p)"] concepts["we-pl-incl"] = concepts["we incl. (pronoun d:1p, incl)"] concepts["in"] = concepts["in, inside"] concepts["not_know"] = concepts["not know"] concepts["their-dual"] = concepts["their (pronoun p:3p, dual)"] concepts["blow-fire"] = concepts["blow (on fire)"] concepts["blunt-eg-knife"] = concepts["blunt (of e.g. knife)"] concepts["our-dual"] = concepts["our (two) (pronoun p:1p, dual)"] concepts["your-pl-dual"] = concepts[ "your (two) pl (pronoun p:2p, dual)"] concepts["suck-breast"] = concepts["to suck at breast"] concepts["draw-water-carry"] = concepts["draw water / carry"] concepts["tree-sp-Gnetum-gnemon"] = concepts[ "tree sp. (Gnetum gnemon)"] concepts["he-she"] = concepts["he, she, it, that, those"] concepts["fed"] = concepts["fed up (with)"] concepts["you-pl-dual-o2p"] = concepts[ "you plural two (pronoun d:2p, dual)"] concepts["you-pl-dual"] = concepts["you two (pronoun d:2s, dual)"] concepts["to-put"] = concepts["to put, give"] concepts["he-she-it-those"] = concepts["he, she, it, that, those"] concepts["we-two-excl"] = concepts[ "we excl. dual (pronoun d:1p, excl, dual)"] concepts["we-pl-excl"] = concepts[ "we excl. plural (pronoun d:1p, excl, plural)"] #concepts["affix-body-part"] = concepts[""] itemfiles = [ f for f in self.raw_dir.iterdir() if f.name.startswith("language-") ] errors = set() for filename in progressbar(sorted(itemfiles), desc="adding lexemes"): for o in sorted(self.raw_dir.read_json(filename), key=lambda d: d["id"]): wordid = self.get_slug_from_uri(o['word']) if wordid in concepts: args.writer.add_forms_from_value( Local_ID=o["id"], Language_ID=self.get_slug_from_uri(o["language"]), Parameter_ID=concepts[wordid], Value=o["entry"], Source=self.get_slug_from_uri(o["source"]), Comment=o["annotation"], ) else: errors.add(("concept", wordid)) for error in errors: args.log.info("error with {0[0]}: {0[1]}".format(error)) args.log.info("found {0} errors in concepts".format(len(errors)))
class Entry(object): """A bibliography entry.""" type = None """Entry type (``'book'``, ``'article'``, etc.).""" key = None """Entry key (for example, ``'fukushima1980neocognitron'``).""" fields = None """A dictionary of entry fields. The dictionary is ordered and case-insensitive.""" rich_fields = None """A dictionary of entry fields, converted to :ref:`rich text <rich-text>`.""" persons = None """ A dictionary of entry persons, by their roles. The most often used roles are ``'author'`` and ``'editor'``. """ collection = None """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs.""" def __init__(self, type_, fields=None, persons=None, collection=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = FieldDict(self, fields) self.rich_fields = RichFieldProxyDict(self.fields) self.persons = OrderedCaseInsensitiveDict(persons) self.collection = collection # for BibTeX interpreter self.vars = {} def __eq__(self, other): if not isinstance(other, Entry): return super(Entry, self) == other return ( self.type == other.type and self.fields == other.fields and self.persons == other.persons ) def __repr__(self): # representing fields as FieldDict causes problems with representing # fields.parent, so represent it as a list of tuples repr_fields = repr(self.fields.items()) return 'Entry({type_}, fields={fields}, persons={persons})'.format( type_=repr(self.type), fields=repr_fields, persons=repr(self.persons), ) def get_crossref(self): return self.collection.entries[self.fields['crossref']] def add_person(self, person, role): self.persons.setdefault(role, []).append(person) def lower(self): return type(self)( self.type, fields=self.fields.lower(), persons=self.persons.lower(), collection=self.collection, )
class Entry(object): """A bibliography entry.""" type = None """Entry type (``'book'``, ``'article'``, etc.).""" key = None """Entry key (for example, ``'fukushima1980neocognitron'``).""" fields = None """A dictionary of entry fields. The dictionary is ordered and case-insensitive.""" persons = None """ A dictionary of entry persons, by their roles. The most often used roles are ``'author'`` and ``'editor'``. """ """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs.""" def __init__(self, type_, fields=None, persons=None): if fields is None: fields = {} if persons is None: persons = {} self.type = type_.lower() self.original_type = type_ self.fields = OrderedCaseInsensitiveDict(fields) self.persons = OrderedCaseInsensitiveDict(persons) def __eq__(self, other): if not isinstance(other, Entry): return super(Entry, self) == other return ( self.type == other.type and self.fields == other.fields and self.persons == other.persons ) def __repr__(self): # represent the fields as a list of tuples for simplicity repr_fields = repr(self.fields.items()) return 'Entry({type_}, fields={fields}, persons={persons})'.format( type_=repr(self.type), fields=repr_fields, persons=repr(self.persons), ) def add_person(self, person, role): self.persons.setdefault(role, []).append(person) def lower(self): return type(self)( self.type, fields=self.fields.lower(), persons=self.persons.lower(), ) def _find_person_field(self, role): persons = self.persons[role] return ' and '.join(six.text_type(person) for person in persons) def _find_crossref_field(self, name, bib_data): if bib_data is None or 'crossref' not in self.fields: raise KeyError(name) referenced_entry = bib_data.entries[self.fields['crossref']] return referenced_entry._find_field(name, bib_data) def _find_field(self, name, bib_data=None): """ Find the field with the given ``name`` according to this rules: - If the given field ``name`` in in ``self.fields``, just return self.fields[name]. - Otherwise, if ``name`` is ``"authors"`` or ``"editors"`` (or any other person role), return the list of names as a string, separated by ``" and "``. - Otherwise, if this entry has a ``crossreff`` field, look up for the cross-referenced entry and try to find its field with the given ``name``. """ try: return self.fields[name] except KeyError: try: return self._find_person_field(name) except KeyError: return self._find_crossref_field(name, bib_data)