def download(): querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) # download entries bib = BibliographyData() start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract citation data for article in querier.articles: querier.get_citation_data(article) # parse entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for key, entry in data.entries.items(): # make sure URL is present if 'url' not in entry.fields: url = article.attrs['url'][0] if url: entry.fields['url'] = url # store bib.add_entry(key, entry) # next page start += 10 query.set_start(start) # write to file CACHE.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
def createMaterialize(self, parent, token, page): ol = self.createHTML(parent, token, page) if ol is None: return for child in ol.children: key = child['id'] db = BibliographyData() db.add_entry(key, self.extension.database().entries[key]) btex = db.to_string("bibtex") m_id = uuid.uuid4() html.Tag(child, 'a', style="padding-left:10px;", class_='modal-trigger moose-bibtex-modal', href="#{}".format(m_id), string='[BibTeX]') modal = html.Tag(child, 'div', class_='modal', id_=m_id) content = html.Tag(modal, 'div', class_='modal-content') pre = html.Tag(content, 'pre', style="line-height:1.25;") html.Tag(pre, 'code', class_='language-latex', string=btex) return ol
def main(): doi = _extract_doi(args.identifier[0]) if doi is None: print(item) elif args.bibtex: result = cn.content_negotiation(doi, format="bibtex") bibtex = parse_string(result, "bibtex") try: name = "".join( bibtex.entries.values()[0].persons.values()[0][0].last_names) name = name.replace("ä", "ae").replace("ö", "oe").replace("ü", "ue") name = unidecode(name) shortdoi = _short_doi(doi)[3:] year = bibtex.entries.values()[0].fields["year"] key = "{}_{}_{}".format(name, year, shortdoi) new = BibliographyData() new.add_entry(key, bibtex.entries[bibtex.entries.keys()[0]]) print(new.to_string("bibtex")) except KeyError: print(result) else: try: result = cn.content_negotiation(doi, format=args.format) print(result) except requests.exceptions.HTTPError: print(doi) print()
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = set() def preExecute(self, content): self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>FormatInline') renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBiliography', RenderBibtexBibliography())
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = ( True, "Show a warning when duplicate entries detected.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = set() def preExecute(self, content): self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>FormatInline') renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBiliography', RenderBibtexBibliography())
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = BibliographyData() self.__citations = set() def init(self, translator): command.CommandExtension.init(self, translator) bib_files = [] for node in anytree.PreOrderIter(self.translator.root): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(command) self.addCommand(BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>Format') renderer.add(BibtexCite, RenderBibtexCite()) renderer.add(BibtexBibliography, RenderBibtexBibliography())
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = BibliographyData() self.__citations = set() def init(self, translator): command.CommandExtension.init(self, translator) bib_files = [] for node in anytree.PreOrderIter(self.translator.root): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(command) self.addCommand(BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>Format') renderer.add(BibtexCite, RenderBibtexCite()) renderer.add(BibtexBibliography, RenderBibtexBibliography())
def make_bibliography(table): # pragma: nocover db = BibliographyData() for row in table: try: entry = row_to_bibentry(row) except ValueError as e: print('Reference', row.get('Reference_ID'), 'dropped:', str(e), file=sys.stderr) continue db.add_entry(entry.key, entry) return db
def load_bib(self, filename='IMSfull.bib'): parser = bibtex.Parser() bib_data = parser.parse_file(filename) self.lastload = os.path.getmtime(filename) self.filename = filename pubs = [] index_keys = {} index_bibkeys = {} for key, elem in bib_data.entries.iteritems(): entry = elem.fields # generate original bibtex # using StringIO and bibtex.writer a = BibliographyData() a.add_entry(key, elem) output = StringIO.StringIO() w = Writer() w.write_stream(a, output) entry['bibtex'] = output.getvalue() # sha1 for absolute unique keys x = hashlib.sha1(simplejson.dumps(entry)) entry['key'] = x.hexdigest() entry['authors'] = self.parse_authors(elem.persons) entry['bibkey'] = elem.key # keywords entry['keywords'] = [] if entry.get('keyword'): for i in entry['keyword'].split(','): entry['keywords'].append(i.strip()) entry['reference'] = self.render_references(elem.type, entry) # append to pubs pubs.append(entry) index_keys[x.hexdigest()] = len(pubs) - 1 index_bibkeys[elem.key] = len(pubs) - 1 if 'year' not in entry: entry['year'] = '' # set at end -> less time for threading problems self.index_keys = index_keys self.index_bibkeys = index_bibkeys self.pubs = pubs
def load_bib(self, filename="IMSfull.bib"): parser = bibtex.Parser() bib_data = parser.parse_file(filename) self.lastload = os.path.getmtime(filename) self.filename = filename pubs = [] index_keys = {} index_bibkeys = {} for key, elem in bib_data.entries.iteritems(): entry = elem.fields # generate original bibtex # using StringIO and bibtex.writer a = BibliographyData() a.add_entry(key, elem) output = StringIO.StringIO() w = Writer() w.write_stream(a, output) entry["bibtex"] = output.getvalue() # sha1 for absolute unique keys x = hashlib.sha1(simplejson.dumps(entry)) entry["key"] = x.hexdigest() entry["authors"] = self.parse_authors(elem.persons) entry["bibkey"] = elem.key # keywords entry["keywords"] = [] if entry.get("keyword"): for i in entry["keyword"].split(","): entry["keywords"].append(i.strip()) entry["reference"] = self.render_references(elem.type, entry) # append to pubs pubs.append(entry) index_keys[x.hexdigest()] = len(pubs) - 1 index_bibkeys[elem.key] = len(pubs) - 1 if "year" not in entry: entry["year"] = "" # set at end -> less time for threading problems self.index_keys = index_keys self.index_bibkeys = index_bibkeys self.pubs = pubs
def createMaterialize(self, token, parent): ol = self.createHTML(token, parent) for child in ol.children: key = child['id'] db = BibliographyData() db.add_entry(key, self.extension.database.entries[key]) btex = db.to_string("bibtex") m_id = uuid.uuid4() html.Tag(child, 'a', style="padding-left:10px;", class_='modal-trigger moose-bibtex-modal', href="#{}".format(m_id), string=u'[BibTeX]') modal = html.Tag(child, 'div', class_='modal', id_=m_id) content = html.Tag(modal, 'div', class_='modal-content') pre = html.Tag(content, 'pre', style="line-height:1.25;") html.Tag(pre, 'code', class_='language-latex', string=btex)
def make_bib_entry(info, style='bibtex'): """ Makes a bibliography entry from the processed api info Uses pybtex to output a valid bibliography entry. style='bibtex' --> "standard" bibtex format style='yaml' --> yaml format (easily convertible to bibtex) """ # create instances bib_entry = BibliographyData() entry = Entry('article') fields = type(entry.fields)() # pybtex.utils.OrderedCaseInsensitiveDict # helper def add_field(k): if k in info: v = info[k] if isinstance(v, list): v = ', '.join(v) fields[k] = str(v) #==== add fields add_field('year') add_field('title') add_field('author') add_field('arxivId') add_field('DOI') add_field('keywords') add_field('abstract') add_field('URL') add_field('pdf') add_field('filename') #==== update instances entry.fields = fields bib_entry.add_entry(info.identifier, entry) #return bib_entry.to_string('bibtex') #return bib_entry.to_string(style) return bib_entry.to_string(style).replace('\_', '_')
def main_cli(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('bib_path', metavar='BIB_PATH', type=arg_is_file, help=('Path to bibtex-formatted file.')) parser.add_argument('-k', '--keywords', nargs='+', type=str, default=["OaksPeerReviewed", "OaksCVPreprint"], help=('Keywords for reference filter.')) args = parser.parse_args() bib_parser = bibtex.Parser() bib_data = bib_parser.parse_file(args.bib_path) filtered_bib_data = BibliographyData() for key, entry in bib_data.entries.items(): kwords = [ x.strip() for x in entry.fields.get('keywords', '').split(',') ] for kw in args.keywords: if kw in kwords: filtered_bib_data.add_entry(entry.key, entry) s = filtered_bib_data.to_string("bibtex") s = s.replace("= \"", "= {") s = s.replace("\",\n", "},\n") s = s.replace("\"\n", "}\n") sys.stdout.write(s)
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = ( True, "Show a warning when duplicate entries detected.") config['duplicates'] = (list(), "A list of duplicates that are allowed.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__bib_files = list() self.__bib_file_database = dict() def preExecute(self): set_strict_mode( False) # allow incorrectly formatted author/editor names # If this is invoked during a live serve, we need to recompile the list of '.bib' files and # read them again, otherwise there's no way to distinguish existing entries from duplicates self.__bib_files = [] for node in self.translator.findPages( lambda p: p.source.endswith('.bib')): self.__bib_files.append(node.source) self.__database = BibliographyData() for bfile in self.__bib_files: try: db = parse_file(bfile) self.__bib_file_database[bfile] = db except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: if self.get('duplicate_warning') and ( key not in self.get('duplicates')): msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) def preRead(self, page): """Initialize the page citations list.""" page['citations'] = list() def postTokenize(self, page, ast): if page['citations']: has_bib = False for node in moosetree.iterate(ast): if node.name == 'BibtexBibliography': has_bib = True break if not has_bib: core.Heading(ast, level=2, string='References') BibtexBibliography(ast, bib_style='plain') def database(self, bibfile=None): if bibfile is None: return self.__database else: return self.__bib_file_database[bibfile] def bibfiles(self): return self.__bib_files def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) self.addCommand(reader, BibtexListCommand()) self.addCommand(reader, BibtexReferenceComponent()) renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexList', RenderBibtexList()) renderer.add('BibtexBibliography', RenderBibtexBibliography()) if isinstance(renderer, LatexRenderer): renderer.addPackage('natbib', 'round')
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.") config['duplicates'] = (list(), "A list of duplicates that are allowed.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = set() def initMetaData(self, page, meta): meta.initData('citations', set()) def addCitations(self, *args): self.__citations.update(args) def preExecute(self, content): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key]) def postTokenize(self, ast, page, meta, reader): if self.__citations: meta.getData('citations').update(self.__citations) self.__citations.clear() has_bib = False for node in anytree.PreOrderIter(ast): if node.name == 'BibtexBibliography': has_bib = True break if not has_bib: BibtexBibliography(ast) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) self.addCommand(reader, BibtexReferenceComponent()) reader.addInline(BibtexReferenceComponentDeprecated(), location='>FormatInline') renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBibliography', RenderBibtexBibliography()) if isinstance(renderer, LatexRenderer): renderer.addPackage('natbib', 'round')
style = MyStyle() back = HtmlBackend() bib_data = parse_file(betterbibfile) entries = bib_data.entries keys = entries.keys() cite_type = ["article", "book"] #### presort contents = [] for c in cite_type: bib = BibliographyData() for k in keys: etype = entries[k].original_type if etype == c: bib.add_entry(entry=entries[k], key=k) formatbib = style.format_bibliography(bib) outfile = "out.html" back.write_to_file(formatbib, outfile) with open(outfile, "r") as f: contents.append(f.read()) bib = BibliographyData() for k in keys: etype = entries[k].original_type if etype == "inproceedings": bib.add_entry(entry=entries[k], key=k) entries = bib.entries keys = entries.keys() for t in ["lecture comittee", "conference", "france"]:
def filter_bibolamazifile(self, bibolamazifile): # # bibdata is a pybtex.database.BibliographyData object # if (not self.dupfile and not self.warn): logger.warning("duplicates filter: No action is being taken because neither " "-sDupfile= nor -dWarn have been requested.") return bibdata = bibolamazifile.bibliographyData(); used_citations = None if self.keep_only_used_in_jobname: if not self.dupfile: logger.warning("Option -sKeepOnlyUsedInJobname has no effect without -sDupfile=... !") else: logger.debug("Getting list of used citations from %s.aux." %(self.keep_only_used_in_jobname)) used_citations = auxfile.get_all_auxfile_citations( self.keep_only_used_in_jobname, bibolamazifile, self.name(), self.jobname_search_dirs, return_set=True ) duplicates = []; arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile) dupl_entryinfo_cache_accessor = self.cacheAccessor(DuplicatesEntryInfoCacheAccessor) for (key, entry) in bibdata.entries.iteritems(): #cache_entries[key] = {} dupl_entryinfo_cache_accessor.prepare_entry_cache(key, entry, arxivaccess) newbibdata = BibliographyData(); unused = BibliographyData(); #unused_respawned = set() # because del unused.entries[key] is not implemented ... :( def copy_entry(entry): #return copy.deepcopy(entry) # too deep ... newpers = {} for role, plist in entry.persons.iteritems(): newpers[role] = [copy.deepcopy(p) for p in plist] return Entry(type_=entry.type, fields=entry.fields.items(), # will create own Fielddict persons=newpers, collection=entry.collection ) # Strategy: go through the list of entries, and each time keeping it if it is new, # or updating the original and registering the alias if it is a duplicate. # # With only_used, the situation is a little trickier as we cannot just discard the # entries as they are filtered: indeed, they might be duplicates of a used entry, # with which one should merge the bib information. # # So the full algorithm does not immediately discard the unused keys, but rather # keeps them in an `unused` list. If they are later required, they are respawned # into the actual new list. # for (key, entry) in bibdata.entries.iteritems(): # # search the newbibdata object, in case this entry already exists. # #logger.longdebug('inspecting new entry %s ...', key); is_duplicate_of = None duplicate_original_is_unused = False for (nkey, nentry) in newbibdata.entries.iteritems(): if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key), dupl_entryinfo_cache_accessor.get_entry_cache(nkey)): logger.longdebug(' ... matches existing entry %s!', nkey); is_duplicate_of = nkey; break for (nkey, nentry) in unused.entries.iteritems(): #if nkey in unused_respawned: # continue if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key), dupl_entryinfo_cache_accessor.get_entry_cache(nkey)): logger.longdebug(' ... matches existing entry %s!', nkey); is_duplicate_of = nkey; duplicate_original_is_unused = True break # # if it's a duplicate # if is_duplicate_of is not None: dup = (key, is_duplicate_of) if duplicate_original_is_unused: self.update_entry_with_duplicate(is_duplicate_of, unused.entries[is_duplicate_of], key, entry) else: # a duplicate of a key we have used. So update the original ... self.update_entry_with_duplicate(is_duplicate_of, newbibdata.entries[is_duplicate_of], key, entry) # ... and register the alias. duplicates.append(dup); if duplicate_original_is_unused and used_citations and key in used_citations: # if we had set the original in the unused list, but we need the # alias, then respawn the original to the newbibdata so we can refer # to it. Bonus: use the name with which we have referred to it, so we # don't need to register any duplicate. newbibdata.add_entry(key, unused.entries[is_duplicate_of]) #unused_respawned.add(is_duplicate_of) del unused.entries[is_duplicate_of] else: if used_citations is not None and key not in used_citations: # new entry, but we don't want it. So add it to the unused list. unused.add_entry(key, entry) else: # new entry and we want it. So add it to the main newbibdata list. newbibdata.add_entry(key, entry) # output duplicates to the duplicates file if (self.dupfile): # and write definitions to the dupfile dupfilepath = os.path.join(bibolamazifile.fdir(), self.dupfile); check_overwrite_dupfile(dupfilepath); dupstrlist = []; with codecs.open(dupfilepath, 'w', 'utf-8') as dupf: dupf.write(BIBALIAS_HEADER.replace('####DUP_FILE_NAME####', self.dupfile)); if not self.custom_bibalias: dupf.write(BIBALIAS_LATEX_DEFINITIONS) # Note: Sort entries in some way (e.g. alphabetically according to # (alias, original)), to avoid diffs in VCS's for (dupalias, duporiginal) in sorted(duplicates, key=lambda x: (x[0],x[1])): dupf.write((r'\bibalias{%s}{%s}' % (dupalias, duporiginal)) + "\n"); dupstrlist.append("\t%s is an alias of %s" % (dupalias,duporiginal)) ; dupf.write('\n\n'); # issue debug message logger.debug("wrote duplicates to file: \n" + "\n".join(dupstrlist)); if (self.warn and duplicates): def warnline(dupalias, duporiginal): def fmt(key, entry, cache_entry): s = ", ".join(string.capwords('%s, %s' % (x[0], "".join(x[1]))) for x in cache_entry['pers']); if 'title_clean' in cache_entry and cache_entry['title_clean']: s += ', "' + (cache_entry['title_clean']).capitalize() + '"' if 'j_abbrev' in cache_entry and cache_entry['j_abbrev']: s += ', ' + cache_entry['j_abbrev'] f = entry.fields if f.get('month',None) and f.get('year',None): s += ', ' + f['month'] + ' ' + f['year'] elif f.get('month', None): s += ', ' + f['month'] + ' <unknown year>' elif f.get('year', None): s += ', ' + f['year'] if 'doi' in entry.fields and entry.fields['doi']: s += ', doi:'+entry.fields['doi'] if 'arxivinfo' in cache_entry and cache_entry['arxivinfo']: s += ', arXiv:'+cache_entry['arxivinfo']['arxivid'] if 'note_cleaned' in cache_entry and cache_entry['note_cleaned']: s += '; ' + cache_entry['note_cleaned'] return s tw = textwrap.TextWrapper(width=DUPL_WARN_ENTRY_COLWIDTH) fmtalias = fmt(dupalias, bibdata.entries[dupalias], dupl_entryinfo_cache_accessor.get_entry_cache(dupalias)) fmtorig = fmt(duporiginal, bibdata.entries[duporiginal], dupl_entryinfo_cache_accessor.get_entry_cache(duporiginal)) linesalias = tw.wrap(fmtalias) linesorig = tw.wrap(fmtorig) maxlines = max(len(linesalias), len(linesorig)) return (DUPL_WARN_ENTRY % { 'alias': dupalias, 'orig': duporiginal } + "\n".join( ('%s%s%s%s' %(' '*DUPL_WARN_ENTRY_BEGCOL, linealias + ' '*(DUPL_WARN_ENTRY_COLWIDTH-len(linealias)), ' '*DUPL_WARN_ENTRY_COLSEP, lineorig) for (linealias, lineorig) in zip(linesalias + ['']*(maxlines-len(linesalias)), linesorig + ['']*(maxlines-len(linesorig)))) ) + "\n\n" ) logger.warning(DUPL_WARN_TOP + "".join([ warnline(dupalias, duporiginal) for (dupalias, duporiginal) in duplicates ]) + DUPL_WARN_BOTTOM % {'num_dupl': len(duplicates)}); # ### TODO: do this not only if we are given a dupfile? #if self.dupfile: # ### --> Bibolamazi v3: also set this if no dupfile was given. This is because we # ### are moving entries themselves around and modifying them anyway # # set the new bibdata, without the duplicates # DON'T DO THIS, BECAUSE CACHES MAY HAVE KEPT A POINTER TO THE BIBDATA. #bibolamazifile.setBibliographyData(newbibdata); # # Instead, update bibolamazifile's bibliographyData() object itself. # bibolamazifile.setEntries(newbibdata.entries.iteritems()) return
tags = json.load(tag_file) tagged = list(tags.keys()) from pybtex.database import BibliographyData, Entry master_data = BibliographyData( { 'article-minimal': Entry('article', [ ('author', 'Leslie B. Lamport'), ('title', "blah blah blah"), ('journal', "Some outlet"), ('year', '1986'), ]), }) # handle duplicates for package in packages: subpackages = packages[package].split() for subpackage in subpackages: package_bib = "tmp/{subpackage}/doc/_static/references.bib".format(subpackage=subpackage) if os.path.isfile(package_bib): local = pybtex.database.parse_file(package_bib) for entry in local.entries: if entry not in master_data.entries: master_data.add_entry(entry, local.entries[entry]) print('adding', entry) with open("doc/_static/references.bib", 'w') as master_bib: master_bib.write(master_data.to_string('bibtex'))
def create_overleaf_files(overleaf): files = [] articles = get_project_articles(FIGSHARE_PROJECT_ID) #print(articles) for article in articles: #print(article['title']) newfiles = get_files_of_article(article['id']) for i, f in enumerate(newfiles): newfiles[i]['article_id'] = article['id'] newfiles[i]['article_name'] = article['title'] files += newfiles fdf = pd.DataFrame(files) #print("fdf",fdf) fdf.sort_values(by=['article_id', 'article_name', 'name']) fdfo = fdf[['article_id', 'article_name', 'name']] fdfo = fdfo.merge(overleaf[['article_id', 'name', 'overleaf']], on=['article_id', 'name'], how='outer') #print("fdfo", fdfo) fdfo = fdfo.where(pd.notnull(fdfo), None) for_download = overleaf.merge(fdf[['article_id', 'name', 'download_url']], on=['article_id', 'name']) #print("for_download",for_download) # create individual files for row in for_download.iterrows(): if len(row[1]['overleaf']) > 0: download_url = row[1]['download_url'] file = raw_issue_request('GET', download_url, binary=True) if '.pkl' in row[1]['name']: with open( '/mnt/labbook/output/untracked/tmp_overleaf-{}/{}'. format(head, row[1]['name']), 'wb') as f: f.write(file) df = pd.read_pickle( '/mnt/labbook/output/untracked/tmp_overleaf-{}/{}'.format( head, row[1]['name'])) df.to_latex( '/mnt/labbook/output/untracked/overleaf-{}/figshare/{}.tex' .format(head, row[1]['overleaf'])) repo.git.add('figshare/{}.tex'.format(row[1]['overleaf'])) else: extension = row[1]['name'].split('.')[-1] with open( '/mnt/labbook/output/untracked/overleaf-{}/figshare/{}.{}' .format(head, row[1]['overleaf'], extension), 'wb') as f: f.write(file) repo.git.add('figshare/{}.{}'.format( row[1]['overleaf'], extension)) # create bibliography file adf = pd.DataFrame(articles) #print(adf) bib_data = BibliographyData() for row in for_download.iterrows(): if len(row[1]['overleaf']) > 0: idx = adf[adf['id'] == row[1]['article_id']].index[0] bib_data.add_entry(key=row[1]['overleaf'], entry=Entry('article', [ ('title', adf.at[idx, 'title']), ('journal', "figshare"), ('doi', adf.at[idx, 'doi']), ])) bib_data.to_file( '/mnt/labbook/output/untracked/overleaf-{}/figures_tables.bib'.format( head)) repo.git.add('figures_tables.bib') # write supplementary tex geometry_options = {"tmargin": "1cm", "lmargin": "1cm"} doc = ltx.Document(geometry_options=geometry_options) doc.preamble.append(ltx.Package('biblatex', options=['sorting=none'])) doc.preamble.append( ltx.Command('addbibresource', arguments=[ltx.NoEscape("figures_tables.bib")])) doc.preamble.append(ltx.Package('booktabs')) doc.preamble.append(ltx.Package('longtable')) with doc.create(ltx.Subsection('images and tables supplementary file')): for row in for_download.iterrows(): if len(row[1]['overleaf']) > 0: idx = adf[adf['id'] == row[1]['article_id']].index[0] #print("The name is...",row[1]['name']) if '.pkl' in row[1]['name']: #print("I should be including something here") with doc.create(ltx.Table(position='hbt')) as table_holder: table_holder.append( ltx.Command('input', arguments=[ ltx.NoEscape( "figshare/{}.tex".format( row[1]['overleaf'])) ])) if row[1]['caption'] is not None: table_holder.add_caption(row[1]['caption']) with open( "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex" .format(head, row[1]['overleaf']), "w") as text_file: text_file.write(row[1]['caption']) else: table_holder.add_caption(adf.at[idx, 'title']) with open( "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex" .format(head, row[1]['overleaf']), "w") as text_file: text_file.write(adf.at[idx, 'title']) repo.git.add('figshare/{}_caption.tex'.format( row[1]['overleaf'])) table_holder.append( ltx.Command( 'cite', arguments=[ltx.NoEscape(row[1]['overleaf'])])) else: with doc.create( ltx.Figure(position='hbt')) as image_holder: image_holder.add_image('figshare/{}'.format( row[1]['overleaf'])) #print("THE CAPTION IS:", row[1]['caption']) if row[1]['caption'] is not None: image_holder.add_caption(row[1]['caption']) with open( "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex" .format(head, row[1]['overleaf']), "w") as text_file: text_file.write( ltx.utils.escape_latex(row[1]['caption'])) else: image_holder.add_caption( ltx.utils.escape_latex(adf.at[idx, 'title'])) with open( "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex" .format(head, row[1]['overleaf']), "w") as text_file: text_file.write( ltx.utils.escape_latex(adf.at[idx, 'title'])) repo.git.add('figshare/{}_caption.tex'.format( row[1]['overleaf'])) image_holder.append( ltx.Command( 'cite', arguments=[ltx.NoEscape(row[1]['overleaf'])])) doc.append(ltx.Command('printbibliography')) doc.generate_tex( '/mnt/labbook/output/untracked/overleaf-{}/supplementary'.format(head)) repo.git.add('supplementary.tex')
from pybtex.database import BibliographyData, Entry master_data = BibliographyData({ "article-minimal": Entry( "article", [ ("author", "Leslie B. Lamport"), ("title", "blah blah blah"), ("journal", "Some outlet"), ("year", "1986"), ], ) }) # handle duplicates for package in packages: subpackages = packages[package].split() for subpackage in subpackages: package_bib = "tmp/{subpackage}/doc/_static/references.bib".format( subpackage=subpackage) if os.path.isfile(package_bib): local = pybtex.database.parse_file(package_bib) for entry in local.entries: if entry not in master_data.entries: master_data.add_entry(entry, local.entries[entry]) print("adding", entry) with open("doc/_static/references.bib", "w") as master_bib: master_bib.write(master_data.to_string("bibtex"))
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.") config['duplicates'] = (list(), "A list of duplicates that are allowed.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = list() def addCitations(self, *args): self.__citations.extend(args) def preExecute(self): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in self.translator.getPages(): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key]) def preTokenize(self, page, ast): page['citations'] = list() def postTokenize(self, page, ast): if self.__citations: page['citations'].extend(self.__citations) self.__citations.clear() has_bib = False for node in moosetree.iterate(ast): if node.name == 'BibtexBibliography': has_bib = True break if not has_bib: BibtexBibliography(ast) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) self.addCommand(reader, BibtexReferenceComponent()) renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBibliography', RenderBibtexBibliography()) if isinstance(renderer, LatexRenderer): renderer.addPackage('natbib', 'round')
def filter_bibolamazifile(self, bibolamazifile): # # bibdata is a pybtex.database.BibliographyData object # bibdata = bibolamazifile.bibliographyData(); arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile) # first, find required fields and apply possible "filters" _rx_short_journal_known = re.compile(r'\b(?P<word>' + r'|'.join(KNOWN_ABBREV.keys()) + r')\b', re.IGNORECASE); def abbreviate(x): if x.lower() in NO_ABBREV: return x return x[0:3]+'.' def short_journal(x): if x.strip().lower() in KNOWN_JOURNALS: return KNOWN_JOURNALS[x.strip().lower()] x = _rx_short_journal_known.sub(lambda m: KNOWN_ABBREV[m.group('word').lower()], x); x = re.sub(r'\b(' + r'|'.join(BORING_WORDS) + r')\b(?!\s*($|[-:;\.]))', '', x, flags=re.IGNORECASE); x = re.sub(r'\b(?P<word>\w+)\b([^\.]|$)', lambda m: abbreviate(m.group('word')), x); x = re.sub(r'[^\w.]+', '', x) if (len(x)>20): x = x[0:18]+'..' return x; def arxivInfo(entry, field): inf = arxivaccess.getArXivInfo(entry.key); if inf is None: return '' return inf[field] fld_fn = { 'author': lambda entry: getlast(entry.persons['author'][0], lower=False)[0], 'authors': lambda entry: "".join([getlast(a, lower=False)[0] for a in entry.persons['author']])[0:25], 'year': lambda entry: entry.fields.get('year', ''), 'year2': lambda entry: '%02d' % (int(entry.fields.get('year', '')) % 100), 'journal_abb': lambda entry: fmtjournal(entry.fields.get('journal', '')), 'journal': lambda entry: short_journal(normstr(delatex(entry.fields.get('journal', '')),lower=False)), 'title_word': lambda entry: next( (word for word in re.sub(r'[^\w\s]', '', delatex(entry.fields.get('title', ''))).split() if word.lower() not in BORING_TITLE_WORDS), '' ), 'doi': lambda entry: entry.fields.get('doi', ''), 'arxivid': lambda entry: arxivInfo(entry, 'arxivid'), 'primaryclass': lambda entry: arxivInfo(entry, 'primaryclass'), }; # used fields fld = set([m.group('field') for m in re.finditer(r'(^|[^%])(%%)*%\((?P<field>\w+)\)', self.fmt)]) # check all valid fields for f in fld: if f not in fld_fn: raise BibFilterError('citekey', "Invalid field `%s\' for citekey filter") logger.debug('Used fields are %r', fld) newbibdata = BibliographyData() class Jump: pass for (key, entry) in bibdata.entries.iteritems(): keyorig = key try: ainfo = arxivaccess.getArXivInfo(key); if (self.if_published is not None): if (not self.if_published and (ainfo is None or ainfo['published'])): logger.longdebug('Skipping published entry %s (filter: unpublished)', key) raise Jump if (self.if_published and (ainfo is not None and not ainfo['published'])): logger.longdebug('Skipping unpublished entry %s (filter: published)', key) raise Jump if self.if_type is not None: if entry.type not in self.if_type: logger.longdebug('Skipping entry %s of different type %s (filter: %r)', key, entry.type, self.if_type) raise Jump repldic = dict(zip(fld, [fld_fn[f](entry) for f in fld])); try: key = self.fmt % repldic; except ValueError as e: raise BibFilterError('citekey', "Error replacing fields: %s" % (e)) except Jump: pass finally: # avoid duplicate keys newkey = key count = 0 while newkey in newbibdata.entries: count += 1; newkey = key + '.%d'%(count) if count: logger.warning("`%s': Citation key `%s' already used: using `%s' instead.", keyorig, key, newkey) # add the entry newbibdata.add_entry(newkey, entry); bibolamazifile.setBibliographyData(newbibdata); return