def _process_one_file(key, f, info_this_key):
    f_pure = os.path.split(f)[1]
    info_this = _process_meta(f)

    # then let's construct a bib entry.
    entry_type = 'misc' if info_this['finished'] else 'unpublished'
    del info_this['finished']
    entry_this = Entry(entry_type,
                       [(x, str(y))
                        for x, y in info_this.items() if y is not None])

    bib_id, _ = os.path.splitext(f_pure)

    bib_data = BibliographyData({bib_id: entry_this})

    bib_cats = info_this['additional-categories']
    if bib_cats is None:
        bib_cats = []
    else:
        bib_cats = [
            tuple(cat.strip().split('/')) for cat in bib_cats.split(',')
        ]
    bib_cats.append(key)
    bib_cats = _additional_cats_closure(bib_cats)
    # '_' + bib_id is the key we should use for GitHub browsing.
    info_this_key.append(
        [bib_id, (bib_data.to_string('bibtex'), key, '_' + bib_id, bib_cats)])
Esempio n. 2
0
def main():
    doi = _extract_doi(args.identifier[0])

    if doi is None:
        print(item)
    elif args.bibtex:
        result = cn.content_negotiation(doi, format="bibtex")
        bibtex = parse_string(result, "bibtex")
        try:
            name = "".join(
                bibtex.entries.values()[0].persons.values()[0][0].last_names)
            name = name.replace("ä", "ae").replace("ö",
                                                   "oe").replace("ü", "ue")
            name = unidecode(name)
            shortdoi = _short_doi(doi)[3:]
            year = bibtex.entries.values()[0].fields["year"]
            key = "{}_{}_{}".format(name, year, shortdoi)
            new = BibliographyData()
            new.add_entry(key, bibtex.entries[bibtex.entries.keys()[0]])
            print(new.to_string("bibtex"))
        except KeyError:
            print(result)
    else:
        try:
            result = cn.content_negotiation(doi, format=args.format)
            print(result)
        except requests.exceptions.HTTPError:
            print(doi)
    print()
Esempio n. 3
0
    def createMaterialize(self, parent, token, page):
        ol = self.createHTML(parent, token, page)
        if ol is None:
            return

        for child in ol.children:
            key = child['id']
            db = BibliographyData()
            db.add_entry(key, self.extension.database().entries[key])
            btex = db.to_string("bibtex")

            m_id = uuid.uuid4()
            html.Tag(child,
                     'a',
                     style="padding-left:10px;",
                     class_='modal-trigger moose-bibtex-modal',
                     href="#{}".format(m_id),
                     string='[BibTeX]')

            modal = html.Tag(child, 'div', class_='modal', id_=m_id)
            content = html.Tag(modal, 'div', class_='modal-content')
            pre = html.Tag(content, 'pre', style="line-height:1.25;")
            html.Tag(pre, 'code', class_='language-latex', string=btex)

        return ol
Esempio n. 4
0
def to_bibtex(citations):
    import uuid
    from pybtex.database import BibliographyData
    entries = {str(uuid.uuid4())[:8]: b for b in citations}
    bib_data = BibliographyData(entries=entries)

    return bib_data.to_string('bibtex')
Esempio n. 5
0
def main():

  if len(sys.argv) != 3:
    print('Filters BibTeX file preserving order and limiting to last 5 years')
    print('usage: %s <original-bib> <output-bib>' % \
        os.path.basename(sys.argv[0]))
    print('example: %s publications.bib filtered.bib')
    sys.exit(1)

  original = sys.argv[1]
  minyear = datetime.date.today().year - 5
  output = sys.argv[2]

  from pybtex.database import parse_file, BibliographyData

  bib_data = parse_file(original)

  filtered = BibliographyData()

  for key in bib_data.entries:
    entry = bib_data.entries[key]
    year = int(entry.fields['year'])
    if year > minyear:
      print('Selecting @%s[%s] from %s' % (entry.type, key, year))
      filtered.entries[key] = entry

  print('Saving to %s...' % output)
  s = filtered.to_string('bibtex')
  for f, t in FIX_STRINGS: s = s = s.replace(f, t)
  with open(output, 'wt') as f: f.write(s)
Esempio n. 6
0
    def preExecute(self, content):

        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                if key in self.__database.entries:
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])
Esempio n. 7
0
    def preExecute(self):

        duplicates = self.get('duplicates', list())
        self.__database = BibliographyData()

        bib_files = []
        for node in self.translator.getPages():
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                duplicate_key = key in self.__database.entries
                duplicate_key_allowed = key in duplicates
                if duplicate_key and (not duplicate_key_allowed):
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                elif not duplicate_key:
                    self.__database.add_entry(key, db.entries[key])
Esempio n. 8
0
def download():
    querier = ScholarQuerier()
    settings = ScholarSettings()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_phrase("eelbrain")
    query.set_timeframe(2012, None)
    query.set_include_patents(False)
    # download entries
    bib = BibliographyData()
    start = 0
    while True:
        querier.send_query(query)
        if len(querier.articles) == 0:
            break
        # extract citation data
        for article in querier.articles:
            querier.get_citation_data(article)
            # parse entry
            data = parse_bytes(article.citation_data, 'bibtex')
            assert len(data.entries) == 1
            for key, entry in data.entries.items():
                # make sure URL is present
                if 'url' not in entry.fields:
                    url = article.attrs['url'][0]
                    if url:
                        entry.fields['url'] = url
                # store
                bib.add_entry(key, entry)
        # next page
        start += 10
        query.set_start(start)
    # write to file
    CACHE.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
Esempio n. 9
0
def saveEntry(key, entry, fileName, appendFlag=False):
    """
    Add a found BibTeX entry into a file with the given.

    :param key: The BibTeX key for the new entry.

    :param entry: The new BibTeX entry to be added.

    :param fileName: The name of the file into which the entry is to \
    be put.

    :param appendFlag: The entry should be appended to the file if the \
    value is ``True`` and replace all the existing entries in the file \
    otherwise.
    """
    try:
        new_data = BibliographyData(entries=OrderedCaseInsensitiveDict(),
                                    preamble=[])
        new_data.entries[key] = entry
        tempFileName = config.ROOT_DIR + '.temp.bib'
        new_data.to_file(tempFileName)
        command = 'cat {temp} {arrows} {real}; rm -f {temp}'. \
                  format(temp = tempFileName, arrows = ('>>' if appendFlag else '>'), real = fileName)
        os.system(command)
    except:
        tkMessageBox.showerror(
            'LiteRef Error',
            'Could not create the file: ' + fileName + '\nAbandoning the key.')
Esempio n. 10
0
    def preExecute(self):
        set_strict_mode(
            False)  # allow incorrectly formatted author/editor names

        # If this is invoked during a live serve, we need to recompile the list of '.bib' files and
        # read them again, otherwise there's no way to distinguish existing entries from duplicates
        self.__bib_files = []
        for node in self.translator.findPages(
                lambda p: p.source.endswith('.bib')):
            self.__bib_files.append(node.source)

        self.__database = BibliographyData()
        for bfile in self.__bib_files:
            try:
                db = parse_file(bfile)
                self.__bib_file_database[bfile] = db
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            for key in db.entries:
                if key in self.__database.entries:
                    if self.get('duplicate_warning') and (
                            key not in self.get('duplicates')):
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])
Esempio n. 11
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = set()

    def preExecute(self, content):

        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                if key in self.__database.entries:
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>FormatInline')

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBiliography', RenderBibtexBibliography())
Esempio n. 12
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """
    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (
            True, "Show a warning when duplicate entries detected.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = set()

    def preExecute(self, content):

        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                if key in self.__database.entries:
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>FormatInline')

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBiliography', RenderBibtexBibliography())
Esempio n. 13
0
    def run(self):

        style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))()
        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        highlight_author = self.options.get('highlight_author', None)

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'], reverse=True)

        print(type(data))
        html = '<div class = "publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            pub_html = list(style.format_entries((entry,)))[0].text.render_as('html')
            if highlight_author:  # highlight an author (usually oneself)
                pub_html = pub_html.replace(highlight_author,
                                            '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class = "publication">' + pub_html

            extra_links = ""
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data = BibliographyData(dict({label: entry}))
                bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex')
                extra_links += '[<a href="{}">bibtex</a>] '.format(bib_link)

            if 'pdf' in entry.fields:  # the link to the pdf file
                extra_links += '[<a href="{}">pdf</a>] '.format(entry.fields['pdf'])

            if extra_links:
                html += '<br/>' + extra_links

            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
Esempio n. 14
0
def run(csvFileName, bibFileName):

    if not os.path.isfile(csvFileName):
        print("File not found: ", csvFileName)
        return

    # I dont kown Why, but dont work complex path in Panda, then I copy file to local path
    tmpFile = tempfile.mktemp()
    copyfile(csvFileName, tmpFile)

    colnames = [
        'title', 'journal', 'book', 'volume', 'issue', 'doi', 'author', 'year',
        'url', 'type'
    ]
    pn = pd.read_csv(tmpFile, names=colnames, skiprows=1)

    bibData = BibliographyData()
    total = 0
    notAuthor = 0

    for row_index, row in pn.iterrows():
        total = total + 1
        fields = []
        if (not pd.isnull(row.title)):
            fields.append(('title', row.title))
        if (not pd.isnull(row.journal)):
            fields.append(('journal', row.journal))
        if (not pd.isnull(row.volume)):
            fields.append(('volume', str(row.volume)))
        if (not pd.isnull(row.volume)):
            fields.append(('issue', str(row.issue)))
        if (not pd.isnull(row.doi)):
            fields.append(('doi', row.doi))
        if (not pd.isnull(row.year)):
            fields.append(('year', str(row.year)))
        if (not pd.isnull(row.url)):
            fields.append(('url', row.url))
        if (not pd.isnull(row.author)):
            fields.append(('author', AuthorFix(row.author)))

        keyPaper = row.doi
        typePaper = TypePaperSelect(row.type)

        print("Chave " + keyPaper + "               \r", end="", flush=True)

        if (pd.isnull(row.author)):
            notAuthor = notAuthor + 1
        else:
            bibData.entries[keyPaper] = Entry(typePaper, fields)

    print("Processed ", total, "                             ")
    print("Removed without author ", notAuthor)
    print("Total Final", len(bibData.entries))

    bibData.to_file(bibFileName)
    print("Saved file ", bibFileName)
Esempio n. 15
0
def aggregate_snls(snls):
    """
    Aggregates a series of SNLs into the fields for a single SNL
    """
    # Choose earliesst created_at
    created_at = sorted([snl["about"]["created_at"]["string"] for snl in snls])[0]

    # Choose earliest history
    history = sorted(snls, key=lambda snl: snl["about"]["created_at"]["string"])[0]["about"]["history"]

    # Aggregate all references into one dict to remove duplicates
    refs = {}
    for snl in snls:
        try:
            entries = parse_string(snl["about"]["references"], bib_format="bibtex")
            refs.update(entries.entries)
        except:
            self.logger.debug("Failed parsing bibtex: {}".format(snl["about"]["references"]))

    entries = BibliographyData(entries=refs)
    references = entries.to_string("bibtex")

    # Aggregate all remarks
    remarks = list(set([remark for snl in snls for remark in snl["about"]["remarks"]]))

    # Aggregate all projects
    projects = list(set([projects for snl in snls for projects in snl["about"]["projects"]]))

    # Aggregate all authors - Converting a single dictionary first performs duplicate checking
    authors = {entry["name"].lower(): entry["email"] for snl in snls for entry in snl["about"]["authors"]}
    authors = [{"name": name.title(), "email": email} for name, email in authors.items()]

    # Aggregate all the database IDs
    db_ids = defaultdict(list)
    for snl in snls:
        if len(snl["about"]["history"]) == 1 and \
                snl["about"]["history"][0]["name"] in DB_indexes:
            db_name = snl["about"]["history"][0]["name"]
            db_id_key = DB_indexes[db_name]
            db_ids[db_id_key].append(snl["about"]["history"][0]["description"].get("id", None))
    # remove Nones and empty lists
    db_ids = {k: list(filter(None, v)) for k, v in db_ids.items() if len(list(filter(None, db_ids.items()))) > 0}

    snl_fields = {
        "created_at": created_at,
        "history": history,
        "references": references,
        "remarks": remarks,
        "projects": projects,
        "authors": authors,
        "data": {"_db_ids": db_ids}
    }

    return snl_fields
Esempio n. 16
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = BibliographyData()
        self.__citations = set()

    def init(self, translator):
        command.CommandExtension.init(self, translator)

        bib_files = []
        for node in anytree.PreOrderIter(self.translator.root):
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            for key in db.entries:
                if key in self.__database.entries:
                    msg = "The BibTeX entry '%s' defined in %s already exists."
                    LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(command)

        self.addCommand(BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>Format')

        renderer.add(BibtexCite, RenderBibtexCite())
        renderer.add(BibtexBibliography, RenderBibtexBibliography())
Esempio n. 17
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """
    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = BibliographyData()
        self.__citations = set()

    def init(self, translator):
        command.CommandExtension.init(self, translator)

        bib_files = []
        for node in anytree.PreOrderIter(self.translator.root):
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            for key in db.entries:
                if key in self.__database.entries:
                    msg = "The BibTeX entry '%s' defined in %s already exists."
                    LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(command)

        self.addCommand(BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>Format')

        renderer.add(BibtexCite, RenderBibtexCite())
        renderer.add(BibtexBibliography, RenderBibtexBibliography())
Esempio n. 18
0
def parse_bibtex(args, wanted):
    if wanted != None:
        bibs = BibliographyData(wanted_entries=wanted)
    else:# Because Ubuntu/Debian doesn't have a new enough pybtex for wanted_entries
        bibs = BibliographyData()
    parser = Parser()
    for filename in input_bibtex_filenames(args):
        filebibs = parser.parse_file(filename)
        bibs.add_entries(iter(filebibs.entries.items()))
    # Sort the entries to ensure a consistent ordering of the output so that adding
    # one new citation doesn't alter the whole file
    bibs.entries = OrderedDict(sorted(bibs.entries.items(),key=lambda x : x[0]))
    return bibs
Esempio n. 19
0
def bib(args):
    gbib = BibliographyData()

    def _harvest(ds, **kw):
        for bib in ds.cldf_dir.glob('*.bib'):
            bib = parse_file(str(bib))
            for id_, entry in bib.entries.items():
                id_ = '{0}:{1}'.format(ds.id, id_)
                if id_ not in gbib.entries:
                    gbib.add_entry(id_, entry)

    with_dataset(args, _harvest, default_to_all=True)
    gbib.to_file(
        str(Path(args.cfg['paths']['lexibank']).joinpath('lexibank.bib')))
Esempio n. 20
0
def make_bibliography(table):  # pragma: nocover
    db = BibliographyData()
    for row in table:
        try:
            entry = row_to_bibentry(row)
        except ValueError as e:
            print('Reference',
                  row.get('Reference_ID'),
                  'dropped:',
                  str(e),
                  file=sys.stderr)
            continue
        db.add_entry(entry.key, entry)
    return db
Esempio n. 21
0
def main(bibfile, template, save_path, save_individual=False):
    # Make sure save_path is a directory if save_individual, and a valid file path otherwise
    if save_individual and not os.path.isdir(save_path):
        print(
            'save_individual is true, but save_path is not a directory. Quitting'
        )
        return
    elif not save_individual and not os.path.isdir(
            os.path.abspath(os.path.dirname(save_path))):
        print(
            'save_individual is false, but save_path is not a valid file location. Quitting'
        )
        return

    # Load the template.
    tenv = jinja2.sandbox.SandboxedEnvironment()
    tenv.filters['author_fmt'] = _author_fmt
    tenv.filters['author_list'] = _author_list
    tenv.filters['title'] = _title
    tenv.filters['venue_type'] = _venue_type
    tenv.filters['venue'] = _venue
    tenv.filters['main_url'] = _main_url
    tenv.filters['extra_urls'] = _extra_urls
    tenv.filters['monthname'] = _month_name
    with open(template) as f:
        tmpl = tenv.from_string(f.read())

    # Parse the BibTeX file.
    with open(bibfile) as f:
        db = bibtex.Parser().parse_stream(f)

    for k, v in db.entries.items():
        # Include the bibliography key in each entry.
        v.fields['key'] = k
        # Include the full BibTeX in each entry, minus fields to ignore
        filtered_v_field_items = filter(
            lambda x: x[0] not in _ignore_fields_bibtex_source,
            v.fields.items())
        filtered_v = Entry(v.type,
                           fields=filtered_v_field_items,
                           persons=v.persons)
        v.fields['bibtex'] = BibliographyData({
            k: filtered_v
        }).to_string('bibtex').strip()
        # Replace ' = "XXX"' with '={XXX}'
        v.fields['bibtex'] = re.sub(r' = \"(.*)\"', r'={\1}',
                                    v.fields['bibtex'])

    # Render the template.
    bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True)
    if save_individual:
        for bib in bib_sorted:
            out = tmpl.render(entry=bib)
            file_path = os.path.join(save_path, '%s.html' % bib.key)
            with open(file_path, 'w') as f:
                f.write(out)
    else:
        out = tmpl.render(entries=bib_sorted)
        with open(save_path, 'w') as f:
            f.write(out)
Esempio n. 22
0
def to_markdown_pandoc(entry, csl_path):
    """
    Converts the PyBtex entry into formatted markdown citation text
    """
    bibtex_string = BibliographyData(entries={
        entry.key: entry
    }).to_string("bibtex")
    citation_text = """
---
nocite: '@*'
---
"""

    with tempfile.TemporaryDirectory() as tmpdir:
        bib_path = os.path.join(tmpdir, "temp.bib")
        with open(bib_path, "w") as bibfile:
            bibfile.write(bibtex_string)

        # Call Pandoc.
        markdown = pypandoc.convert_text(
            source=citation_text,
            to="markdown_strict-citations",
            format="md",
            extra_args=["--csl", csl_path, "--bibliography", bib_path],
            filters=["pandoc-citeproc"],
        )

    # TODO: Perform this extraction better
    markdown = markdown.split("\n")[0][2:]

    return str(markdown)
Esempio n. 23
0
class MacrosTest(ParserTest, TestCase):
    input_string = u"""
        @String{and = { and }}
        @String{etal = and # { {et al.}}}
        @Article(
            unknown,
            author = nobody,
        )
        @Article(
            gsl,
            author = "Gough, Brian"#etal,
        )
    """
    correct_result = BibliographyData([
        ('unknown', Entry('article')),
        ('gsl',
         Entry('article',
               persons={
                   u'author': [Person(u'Gough, Brian'),
                               Person(u'{et al.}')]
               })),
    ])
    errors = [
        'undefined string in line 6: nobody',
    ]
Esempio n. 24
0
class KeylessEntriesTest(ParserTest, TestCase):
    parser_options = {'keyless_entries': True}
    input_string = u"""
        @BOOK(
            title="I Am Jackie Chan: My Life in Action",
            year=1999
        )
        @BOOK()
        @BOOK{}

        @BOOK{
            title = "Der deutsche Jackie Chan Filmführer",
        }

    """
    correct_result = BibliographyData({
        'unnamed-1':
        Entry('book', {
            'title': 'I Am Jackie Chan: My Life in Action',
            'year': '1999'
        }),
        'unnamed-2':
        Entry('book'),
        'unnamed-3':
        Entry('book'),
        'unnamed-4':
        Entry('book', {'title': u'Der deutsche Jackie Chan Filmführer'}),
    })
Esempio n. 25
0
class EntryTypesTest(ParserTest, TestCase):
    input_string = u"""
        Testing what are allowed for entry types

        These are OK
        @somename{an_id,}
        @t2{another_id,}
        @t@{again_id,}
        @t+{aa1_id,}
        @_t{aa2_id,}

        These ones not
        @2thou{further_id,}
        @some name{id3,}
        @some#{id4,}
        @some%{id4,}
    """
    correct_result = BibliographyData([
        ('an_id', Entry('somename')),
        ('another_id', Entry('t2')),
        ('again_id', Entry('t@')),
        ('aa1_id', Entry('t+')),
        ('aa2_id', Entry('_t')),
    ])
    errors = [
        "syntax error in line 12: a valid name expected",
        "syntax error in line 13: '(' or '{' expected",
        "syntax error in line 14: '(' or '{' expected",
        "syntax error in line 15: '(' or '{' expected",
    ]
Esempio n. 26
0
class BracesAndQuotesTest(ParserTest, TestCase):
    input_string = '''@ARTICLE{
                test,
                title="Nested braces  and {"quotes"}",
        }'''
    correct_result = BibliographyData(
        {'test': Entry('article', {'title': 'Nested braces and {"quotes"}'})})
Esempio n. 27
0
    def preExecute(self, content):

        duplicates = self.get('duplicates', list())
        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                duplicate_key = key in self.__database.entries
                duplicate_key_allowed = key in duplicates
                if duplicate_key and (not duplicate_key_allowed):
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                elif not duplicate_key:
                    self.__database.add_entry(key, db.entries[key])
Esempio n. 28
0
class InlineCommentTest(ParserTest, TestCase):
    input_string = u"""
        "some text" causes an error like this
        ``You're missing a field name---line 6 of file bibs/inline_comment.bib``
        for all 3 of the % some text occurences below; in each case the parser keeps
        what it has up till that point and skips, so that it correctly gets the last
        entry.
        @article{Me2010,}
        @article{Me2011,
            author="Brett-like, Matthew",
        % some text
            title="Another article"}
        @article{Me2012, % some text
            author="Real Brett"}
        This one correctly read
        @article{Me2013,}
    """
    correct_result = BibliographyData([
        ('Me2010', Entry('article')),
        ('Me2011',
         Entry('article',
               persons={
                   'author': [
                       Person(first='Matthew', last='Brett-like'),
                   ]
               })),
        ('Me2012', Entry('article')),
        ('Me2013', Entry('article')),
    ])
    errors = [
        "syntax error in line 10: '}' expected",
        "syntax error in line 12: '}' expected",
    ]
Esempio n. 29
0
class DuplicateFieldTest(ParserTest, TestCase):
    input_strings = [
        r"""
            @MASTERSTHESIS{
                Mastering,
                year = 1364,
                title = "Mastering Thesis Writing",
                school = "Charles University in Prague",
                TITLE = "No One Reads Master's Theses Anyway LOL",
                TiTlE = "Well seriously, lol.",
            }
        """
    ]
    correct_result = BibliographyData({
        'Mastering':
        Entry(
            'mastersthesis',
            fields=[
                ('year', '1364'),
                ('title', 'Mastering Thesis Writing'),
                ('school', 'Charles University in Prague'),
            ],
        ),
    })
    errors = [
        'entry with key Mastering has a duplicate TITLE field',
        'entry with key Mastering has a duplicate TiTlE field',
    ]
Esempio n. 30
0
    def create_bibliography(self, record):
        texkey, entries = self.create_bibliography_entry(record)
        data = {texkey: entries}

        bib_data = BibliographyData(data)
        writer = BibtexWriter()
        return writer.to_string(bib_data)
Esempio n. 31
0
class BracesTest(ParserTest, TestCase):
    input_string = u"""@ARTICLE{
                test,
                title={Polluted
                    with {DDT}.
            },
    }"""
    correct_result = BibliographyData([(u'test', Entry('article', [(u'title', 'Polluted with {DDT}.')]))])
Esempio n. 32
0
class BracesTest(ParserTest, TestCase):
    input_string = """@ARTICLE{
                test,
                title={Polluted
                    with {DDT}.
            },
    }"""
    correct_result = BibliographyData(
        {'test': Entry('article', {'title': 'Polluted with {DDT}.'})})
Esempio n. 33
0
    def create_bibliography(self, record_list):
        bib_dict = {}
        for record in record_list:
            texkey, entries = self.create_bibliography_entry(record)
            bib_dict[texkey] = entries

        bib_data = BibliographyData(bib_dict)
        writer = BibtexWriter()
        return writer.to_string(bib_data)
Esempio n. 34
0
class UnusedEntryTest(ParserTest, TestCase):
    parser_options = {'wanted_entries': []}
    input_string = u"""
        @Article(
            gsl,
            author = nobody,
        )
    """
    correct_result = BibliographyData()
Esempio n. 35
0
class BracesAndQuotesTest(ParserTest, TestCase):
    input_string = u'''@ARTICLE{
                test,
                title="Nested braces  and {"quotes"}",
        }'''
    correct_result = BibliographyData([
        (u'test', Entry('article',
                        [(u'title', 'Nested braces and {"quotes"}')]))
    ])
Esempio n. 36
0
    def on_config(self, config):
        """
        Loads bibliography on load of config
        """

        bibfiles = []

        # Set bib_file from either url or path
        if self.config.get("bib_file", None) is not None:
            is_url = validators.url(self.config["bib_file"])
            # if bib_file is a valid URL, cache it with tempfile
            if is_url:
                bibfiles.append(
                    tempfile_from_url(self.config["bib_file"], '.bib'))
            else:
                bibfiles.append(self.config["bib_file"])
        elif self.config.get("bib_dir", None) is not None:
            bibfiles.extend(Path(self.config["bib_dir"]).glob("*.bib"))
        else:
            raise Exception(
                "Must supply a bibtex file or directory for bibtex files")

        # load bibliography data
        refs = {}
        for bibfile in bibfiles:
            bibdata = parse_file(bibfile)
            refs.update(bibdata.entries)

        self.bib_data = BibliographyData(entries=refs)

        # Set CSL from either url or path (or empty)
        is_url = validators.url(self.config["csl_file"])
        if is_url:
            self.csl_file = tempfile_from_url(self.config["csl_file"], '.csl')
        else:
            self.csl_file = self.config.get("csl_file", None)

        # Toggle whether or not to render citations inline (Requires CSL)
        self.cite_inline = self.config.get("cite_inline", False)
        if self.cite_inline and not self.csl_file:
            raise Exception(
                "Must supply a CSL file in order to use cite_inline")

        return config
Esempio n. 37
0
File: bibmod.py Progetto: mfa/webbib
    def load_bib(self, filename="IMSfull.bib"):
        parser = bibtex.Parser()
        bib_data = parser.parse_file(filename)
        self.lastload = os.path.getmtime(filename)
        self.filename = filename
        pubs = []
        index_keys = {}
        index_bibkeys = {}
        for key, elem in bib_data.entries.iteritems():
            entry = elem.fields

            # generate original bibtex
            # using StringIO and bibtex.writer
            a = BibliographyData()
            a.add_entry(key, elem)
            output = StringIO.StringIO()
            w = Writer()
            w.write_stream(a, output)
            entry["bibtex"] = output.getvalue()

            # sha1 for absolute unique keys
            x = hashlib.sha1(simplejson.dumps(entry))
            entry["key"] = x.hexdigest()
            entry["authors"] = self.parse_authors(elem.persons)
            entry["bibkey"] = elem.key

            # keywords
            entry["keywords"] = []
            if entry.get("keyword"):
                for i in entry["keyword"].split(","):
                    entry["keywords"].append(i.strip())
            entry["reference"] = self.render_references(elem.type, entry)

            # append to pubs
            pubs.append(entry)
            index_keys[x.hexdigest()] = len(pubs) - 1
            index_bibkeys[elem.key] = len(pubs) - 1
            if "year" not in entry:
                entry["year"] = ""
        # set at end -> less time for threading problems
        self.index_keys = index_keys
        self.index_bibkeys = index_bibkeys
        self.pubs = pubs
Esempio n. 38
0
    def createMaterialize(self, token, parent):
        ol = self.createHTML(token, parent)

        for child in ol.children:
            key = child['id']
            db = BibliographyData()
            db.add_entry(key, self.extension.database.entries[key])
            btex = db.to_string("bibtex")

            m_id = uuid.uuid4()
            html.Tag(child, 'a',
                     style="padding-left:10px;",
                     class_='modal-trigger moose-bibtex-modal',
                     href="#{}".format(m_id),
                     string=u'[BibTeX]')

            modal = html.Tag(child, 'div', class_='modal', id_=m_id)
            content = html.Tag(modal, 'div', class_='modal-content')
            pre = html.Tag(content, 'pre', style="line-height:1.25;")
            html.Tag(pre, 'code', class_='language-latex', string=btex)
Esempio n. 39
0
    def modifiedKeys(self, keyPattern):
        newBib = BibliographyData()

        for bib_id in self._bibdata.entries:
            persons = self._bibdata.entries[bib_id].persons
            fields = self._bibdata.entries[bib_id].fields

            new_bib_id = ''

            for kp in keyPattern:
                if kp:
                    if kp in persons.keys():
                        new_bib_id += ''.join(a.last()[0] for a in persons[kp])
                    elif kp in fields.keys():
                        new_bib_id += ''.join(a for a in fields[kp])

            if new_bib_id:
                new_bib_id = self._cleanLaTeXFromKey(new_bib_id)
                newBib.entries[new_bib_id] = self._bibdata.entries[bib_id]
            else:
                newBib.entries[bib_id] = self._bibdata.entries[bib_id]

        return newBib
Esempio n. 40
0
    def run(self):

        style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))()
        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_author = self.options.get('highlight_author', None)
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'], reverse=True)

        print(type(data))
        html = '<div class = "publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            pub_html = list(style.format_entries((entry,)))[0].text.render_as('html')
            if highlight_author:  # highlight an author (usually oneself)
                pub_html = pub_html.replace(highlight_author,
                                            '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class = "publication">' + pub_html

            extra_links = ""
            bib_data = BibliographyData(dict({label: entry}))  # detail_page_dir may need it later
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex')
                extra_links += '[<a href="{}">BibTeX</a>] '.format(
                    self.site.config['BASE_URL'] + bib_link)

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(entry.fields['fulltext'])

            if extra_links or detail_page_dir:
                html += '<br>'
            html += extra_links

            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += ' [<a href="{}">abstract and details</a>]'.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title': process_bibtex_string(entry.fields['title']),
                    'abstract': process_bibtex_string(entry.fields['abstract']) if 'abstract' in entry.fields else '',
                    'bibtex': bib_data.to_string('bibtex'),
                    'bibtex_link': '/' + bib_link if bibtex_dir else '',
                    'default_lang': self.site.config['DEFAULT_LANG'],
                    'label': label,
                    'lang': self.site.config['DEFAULT_LANG'],
                    'permalink': self.site.config['SITE_URL'] + page_url,
                    'reference': pub_html,
                    'extra_links': extra_links
                }

                if 'fulltext' in entry.fields and entry.fields['fulltext'].endswith('.pdf'):
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')),
                    context,
                )

            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
Esempio n. 41
0
    def filter_bibolamazifile(self, bibolamazifile):
        #
        # bibdata is a pybtex.database.BibliographyData object
        #
        bibdata = bibolamazifile.bibliographyData();

        arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile)

        # first, find required fields and apply possible "filters"

        _rx_short_journal_known = re.compile(r'\b(?P<word>' + r'|'.join(KNOWN_ABBREV.keys()) + r')\b',
                                             re.IGNORECASE);
        def abbreviate(x):
            if x.lower() in NO_ABBREV:
                return x
            return x[0:3]+'.'

        def short_journal(x):
            if x.strip().lower() in KNOWN_JOURNALS:
                return KNOWN_JOURNALS[x.strip().lower()]
            x = _rx_short_journal_known.sub(lambda m: KNOWN_ABBREV[m.group('word').lower()], x);
            x = re.sub(r'\b(' + r'|'.join(BORING_WORDS) + r')\b(?!\s*($|[-:;\.]))', '', x, flags=re.IGNORECASE);
            x = re.sub(r'\b(?P<word>\w+)\b([^\.]|$)',
                       lambda m: abbreviate(m.group('word')), x);
            x = re.sub(r'[^\w.]+', '', x)
            if (len(x)>20):
                x = x[0:18]+'..'
            return x;

        def arxivInfo(entry, field):
            inf = arxivaccess.getArXivInfo(entry.key);
            if inf is None:
                return ''
            return inf[field]
        
        fld_fn = {
            'author': lambda entry: getlast(entry.persons['author'][0], lower=False)[0],
            'authors': lambda entry: "".join([getlast(a, lower=False)[0] for a in entry.persons['author']])[0:25],
            'year': lambda entry: entry.fields.get('year', ''),
            'year2': lambda entry: '%02d' % (int(entry.fields.get('year', '')) % 100),
            'journal_abb': lambda entry: fmtjournal(entry.fields.get('journal', '')),
            'journal': lambda entry: short_journal(normstr(delatex(entry.fields.get('journal', '')),lower=False)),
            'title_word': lambda entry: next(
                (word for word in re.sub(r'[^\w\s]', '', delatex(entry.fields.get('title', ''))).split()
                 if word.lower() not in BORING_TITLE_WORDS),
                ''
                 ),
            'doi': lambda entry: entry.fields.get('doi', ''),
            'arxivid': lambda entry: arxivInfo(entry, 'arxivid'),
            'primaryclass': lambda entry: arxivInfo(entry, 'primaryclass'),
            };
        # used fields
        fld = set([m.group('field') for m in re.finditer(r'(^|[^%])(%%)*%\((?P<field>\w+)\)', self.fmt)])
        # check all valid fields
        for f in fld:
            if f not in fld_fn:
                raise BibFilterError('citekey', "Invalid field `%s\' for citekey filter")

        logger.debug('Used fields are %r', fld)

        newbibdata = BibliographyData()
        
        class Jump: pass
        
        for (key, entry) in bibdata.entries.iteritems():

            keyorig = key
            
            try:
                ainfo = arxivaccess.getArXivInfo(key);
                if (self.if_published is not None):
                    if (not self.if_published and (ainfo is None or ainfo['published'])):
                        logger.longdebug('Skipping published entry %s (filter: unpublished)', key)
                        raise Jump
                    if (self.if_published and (ainfo is not None and not ainfo['published'])):
                        logger.longdebug('Skipping unpublished entry %s (filter: published)', key)
                        raise Jump
                if self.if_type is not None:
                    if entry.type not in self.if_type:
                        logger.longdebug('Skipping entry %s of different type %s (filter: %r)',
                                         key, entry.type, self.if_type)
                        raise Jump

                repldic = dict(zip(fld, [fld_fn[f](entry) for f in fld]));

                try:
                    key =  self.fmt % repldic;
                except ValueError as e:
                    raise BibFilterError('citekey', "Error replacing fields: %s" % (e))
                
            except Jump:
                pass
            finally:
                # avoid duplicate keys
                newkey = key
                count = 0
                while newkey in newbibdata.entries:
                    count += 1;
                    newkey = key + '.%d'%(count)
                if count:
                    logger.warning("`%s': Citation key `%s' already used: using `%s' instead.",
                                   keyorig, key, newkey)
                # add the entry
                newbibdata.add_entry(newkey, entry);

        bibolamazifile.setBibliographyData(newbibdata);

        return
Esempio n. 42
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.")
        config['duplicates'] = (list(), "A list of duplicates that are allowed.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = set()

    def initMetaData(self, page, meta):
        meta.initData('citations', set())

    def addCitations(self, *args):
        self.__citations.update(args)

    def preExecute(self, content):

        duplicates = self.get('duplicates', list())
        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                duplicate_key = key in self.__database.entries
                duplicate_key_allowed = key in duplicates
                if duplicate_key and (not duplicate_key_allowed):
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                elif not duplicate_key:
                    self.__database.add_entry(key, db.entries[key])

    def postTokenize(self, ast, page, meta, reader):
        if self.__citations:
            meta.getData('citations').update(self.__citations)
            self.__citations.clear()

            has_bib = False
            for node in anytree.PreOrderIter(ast):
                if node.name == 'BibtexBibliography':
                    has_bib = True
                    break

            if not has_bib:
                BibtexBibliography(ast)

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())
        self.addCommand(reader, BibtexReferenceComponent())
        reader.addInline(BibtexReferenceComponentDeprecated(), location='>FormatInline')

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBibliography', RenderBibtexBibliography())

        if isinstance(renderer, LatexRenderer):
            renderer.addPackage('natbib', 'round')
Esempio n. 43
0
class MooseBibtex(MooseCommonExtension, Preprocessor):
  """
  Creates per-page bibliographies using latex syntax.
  """

  RE_BIBLIOGRAPHY = r'(?<!`)\\bibliography\{(.*?)\}'
  RE_STYLE = r'(?<!`)\\bibliographystyle\{(.*?)\}'
  RE_CITE = r'(?<!`)\\(?P<cmd>cite|citet|citep)\{(?P<key>.*?)\}'

  def __init__(self, markdown_instance=None, **kwargs):
    MooseCommonExtension.__init__(self, **kwargs),
    Preprocessor.__init__(self, markdown_instance)

  def run(self, lines):
    """
    Create a bibliography from cite commands.
    """

    # Join the content to enable regex searches throughout entire text
    content = '\n'.join(lines)

    # Build the database of bibtex data
    self._citations = []              # member b/c it is used in substitution function
    self._bibtex = BibliographyData() # ""
    bibfiles = []
    match = re.search(self.RE_BIBLIOGRAPHY, content)
    if match:
      bib_string = match.group(0)
      for bfile in match.group(1).split(','):
        try:
          bibfiles.append(os.path.join(self._docs_dir, bfile.strip()))
          data = parse_file(bibfiles[-1])
        except Exception as e:
          log.error('Failed to parse bibtex file: {}'.format(bfile.strip()))
          traceback.print_exc(e)
          return lines
        self._bibtex.add_entries(data.entries.iteritems())
    else:
      return lines

    # Determine the style
    match = re.search(self.RE_STYLE, content)
    if match:
      content = content.replace(match.group(0), '')
      try:
        style = find_plugin('pybtex.style.formatting', match.group(1))
      except:
        log.error('Unknown bibliography style "{}"'.format(match.group(1)))
        return lines

    else:
      style = find_plugin('pybtex.style.formatting', 'plain')

    # Replace citations with author date, as an anchor
    content = re.sub(self.RE_CITE, self.authors, content)

    # Create html bibliography
    if self._citations:

      # Generate formatted html using pybtex
      formatted_bibliography = style().format_bibliography(self._bibtex, self._citations)
      backend = find_plugin('pybtex.backends', 'html')
      stream = io.StringIO()
      backend().write_to_stream(formatted_bibliography, stream)

      # Strip the bib items from the formatted html
      html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL)

      # Produces an ordered list with anchors to the citations
      output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'.format(str(bibfiles))
      for i, item in enumerate(html):
        output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item)
      output += u'</ol>\n'
      content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content)

    return content.split('\n')

  def authors(self, match):
    """
    Return the author(s) citation for text, linked to bibliography.
    """
    cmd = match.group('cmd')
    key = match.group('key')
    tex = '\\%s{%s}' % (cmd, key)

    if key in self._bibtex.entries:
      self._citations.append(key)
      entry = self._bibtex.entries[key]
      a = entry.persons['author']
      n = len(a)
      if n > 2:
        author = '{} et al.'.format(' '.join(a[0].last_names))
      elif n == 2:
        a0 = ' '.join(a[0].last_names)
        a1 = ' '.join(a[1].last_names)
        author = '{} and {}'.format(a0, a1)
      else:
        author = ' '.join(a[0].last_names)

      if cmd == 'citep':
        a = '<a href="#{}" data-moose-cite="{}">{}, {}</a>'.format(key, tex, author, entry.fields['year'])
        return '({})'.format(self.markdown.htmlStash.store(a, safe=True))
      else:
        a = '<a href="#{}" data-moose-cite="{}">{} ({})</a>'.format(key, tex, author, entry.fields['year'])
        return self.markdown.htmlStash.store(a, safe=True)
Esempio n. 44
0
class BibtexPreprocessor(MooseMarkdownCommon, Preprocessor):
    """
    Creates per-page bibliographies using latex syntax.
    """

    RE_BIBLIOGRAPHY = r'(?<!`)\\bibliography\{(.*?)\}'
    RE_STYLE = r'(?<!`)\\bibliographystyle\{(.*?)\}'
    RE_CITE = r'(?<!`)\\(?P<cmd>cite|citet|citep)\{(?P<keys>.*?)\}'

    @staticmethod
    def defaultSettings():
        """BibtexPreprocessor configure options."""
        return dict() # this extension doesn't have settings

    def __init__(self, markdown_instance=None, **kwargs):
        MooseMarkdownCommon.__init__(self, **kwargs)
        Preprocessor.__init__(self, markdown_instance)
        self._macro_files = kwargs.pop('macro_files', None)
        self._bibtex = None
        self._citations = []

    def parseBibtexFile(self, bibfile):
        """
        Returns parsed bibtex file.  If "macro_files" are supplied in the configuration
        file, then a temporary file will be made that contains the supplied macros
        above the original bib file.  This temporary combined file can then be
        parsed by pybtex.
        """

        if self._macro_files:
            t_bib_path = MooseDocs.abspath("tBib.bib")
            with open(t_bib_path, "wb") as t_bib:
                for t_file in self._macro_files:
                    with open(MooseDocs.abspath(t_file.strip()), "rb") as in_file:
                        shutil.copyfileobj(in_file, t_bib)
                with open(bibfile, "rb") as in_file:
                    shutil.copyfileobj(in_file, t_bib)
            data = parse_file(t_bib_path)
            if os.path.isfile(t_bib_path):
                os.remove(t_bib_path)
        else:
            data = parse_file(bibfile)

        return data

    def run(self, lines):
        """
        Create a bibliography from cite commands.
        """

        # Join the content to enable regex searches throughout entire text
        content = '\n'.join(lines)

        # Build the database of bibtex data
        self._citations = []              # member b/c it is used in substitution function
        self._bibtex = BibliographyData() # ""
        bibfiles = []
        match = re.search(self.RE_BIBLIOGRAPHY, content)
        if match:
            for bfile in match.group(1).split(','):
                try:
                    bibfiles.append(MooseDocs.abspath(bfile.strip()))
                    data = self.parseBibtexFile(bibfiles[-1])
                except UndefinedMacro:
                    LOG.error('Undefined macro in bibtex file: %s, specify macro_files arguments ' \
                              'in configuration file (e.g. website.yml)', bfile.strip())
                self._bibtex.add_entries(data.entries.iteritems())
        else:
            return lines

        # Determine the style
        match = re.search(self.RE_STYLE, content)
        if match:
            content = content.replace(match.group(0), '')
            try:
                style = find_plugin('pybtex.style.formatting', match.group(1))
            except PluginNotFound:
                LOG.error('Unknown bibliography style "%s"', match.group(1))
                return lines

        else:
            style = find_plugin('pybtex.style.formatting', 'plain')

        # Replace citations with author date, as an anchor
        content = re.sub(self.RE_CITE, self.authors, content)

        # Create html bibliography
        if self._citations:

            # Generate formatted html using pybtex
            formatted_bibliography = style().format_bibliography(self._bibtex, self._citations)
            backend = find_plugin('pybtex.backends', 'html')
            stream = io.StringIO()
            backend().write_to_stream(formatted_bibliography, stream)

            # Strip the bib items from the formatted html
            html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(),
                              flags=re.MULTILINE|re.DOTALL)

            # Produces an ordered list with anchors to the citations
            output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'
            output = output.format(str(bibfiles))
            for i, item in enumerate(html):
                output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item)
            output += u'</ol>\n'
            content = re.sub(self.RE_BIBLIOGRAPHY,
                             self.markdown.htmlStash.store(output, safe=True),
                             content)

        return content.split('\n')

    def authors(self, match):
        """
        Return the author(s) citation for text, linked to bibliography.
        """
        cmd = match.group('cmd')
        keys = match.group('keys')
        tex = '\\%s{%s}' % (cmd, keys)

        cite_list = []

        # Loop over all keys in the cite command
        for key in [k.strip() for k in keys.split(',')]:

            # Error if the key is not found and move on
            if key not in self._bibtex.entries:
                LOG.error('Unknown bibtext key: %s', key)
                continue

            # Build the author list
            self._citations.append(key)
            entry = self._bibtex.entries[key]
            a = entry.persons['author']
            n = len(a)
            if n > 2:
                author = '{} et al.'.format(' '.join(a[0].last_names))
            elif n == 2:
                a0 = ' '.join(a[0].last_names)
                a1 = ' '.join(a[1].last_names)
                author = '{} and {}'.format(a0, a1)
            else:
                author = ' '.join(a[0].last_names)

            if cmd == 'citep':
                a = '<a href="#{}">{}, {}</a>'.format(key, author, entry.fields['year'])
            else:
                a = '<a href="#{}">{} ({})</a>'.format(key, author, entry.fields['year'])

            cite_list.append(a)

        # Create the correct text for list of keys in the cite command
        if len(cite_list) == 2:
            cite_list = [' and '.join(cite_list)]
        elif len(cite_list) > 2:
            cite_list[-1] = 'and ' + cite_list[-1]

        # Write the html
        if cmd == 'citep':
            html = '(<span data-moose-cite="{}">{}</span>)'.format(tex, '; '.join(cite_list))
        else:
            html = '<span data-moose-cite="{}">{}</span>'.format(tex, ', '.join(cite_list))

        # substitute Umlauts
        umlaut_re = re.compile(r"\{\\\"([aouAOU])\}")
        html = umlaut_re.sub('&\\1uml;', html)

        return self.markdown.htmlStash.store(html, safe=True)
    def run(self):

        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_authors = self.options.get('highlight_author', None)
        if highlight_authors:
            highlight_authors = highlight_authors.split(';')
        style = Style(self.site.config['BASE_URL'] + detail_page_dir if detail_page_dir else None)
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        all_entries = []
        labels = set()
        for a in self.arguments:
            parser = Parser()
            for item in parser.parse_file(a).entries.items():
                if item[0] in labels:  # duplicated entries
                    LOGGER.warning(
                        ("publication_list: BibTeX entries with duplicated labels are found. "
                         "Only the first occurrence will be used."))
                    continue
                labels.add(item[0])
                all_entries.append(item)
        # Sort the publication entries by year reversed
        data = sorted(all_entries, key=lambda e: e[1].fields['year'], reverse=True)

        html = '<div class="publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.makedirs(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.makedirs(os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            entry.label = label  # Pass label to the style.
            pub_html = list(style.format_entries((entry,)))[0].text.render_as('html')
            if highlight_authors:  # highlight one of several authors (usually oneself)
                for highlight_author in highlight_authors:
                    # We need to replace all occurrence of space except for the last one with
                    # &nbsp;, since pybtex does it for all authors
                    count = highlight_author.count(' ') - 1
                    pub_html = pub_html.replace(
                        highlight_author.strip().replace(' ', '&nbsp;', count),
                        '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html

            extra_links = ""

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(entry.fields['fulltext'])

            bibtex_fields = dict(entry.fields)
            # Collect and remove custom links (fields starting with "customlink")
            custom_links = dict()
            for key, value in bibtex_fields.items():
                if key.startswith('customlink'):
                    custom_links[key[len('customlink'):]] = value
            # custom fields (custom links)
            for key, value in custom_links.items():
                extra_links += '[<a href="{}">{}</a>] '.format(value, key)

            # Remove some fields for the publicly available BibTeX file since they are mostly only
            # used by this plugin.
            for field_to_remove in ('abstract', 'fulltext'):
                if field_to_remove in bibtex_fields:
                    del bibtex_fields[field_to_remove]
            # Prepare for the bib file. Note detail_page_dir may need bib_data later.
            bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons)
            bib_data = BibliographyData(dict({label: bibtex_entry}))
            bib_string = bib_data.to_string('bibtex')
            extra_links += '''
            [<a href="javascript:void(0)" onclick="
            (function(target, id) {{
              if ($('#' + id).css('display') == 'block')
              {{
                $('#' + id).hide('fast');
                $(target).text('BibTeX&#x25BC;')
              }}
              else
              {{
                $('#' + id).show('fast');
                $(target).text('BibTeX&#x25B2;')
              }}
            }})(this, '{}');">BibTeX&#x25BC;</a>]
            '''.format('bibtex-' + label)
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex')

            if extra_links or detail_page_dir or 'abstract' in entry.fields:
                html += '<br>'

            # Add the abstract link.
            if 'abstract' in entry.fields:
                html += '''
                [<a href="javascript:void(0)" onclick="
                (function(target, id) {{
                  if ($('#' + id).css('display') == 'block')
                {{
                  $('#' + id).hide('fast');
                  $(target).text('abstract&#x25BC;')
                }}
                else
                {{
                  $('#' + id).show('fast');
                  $(target).text('abstract&#x25B2;')
                }}
                }})(this, '{}');">abstract&#x25BC;</a>] '''.format('abstract-' + label)

            display_none = '<div id="{}" style="display:none"><pre>{}</pre></div>'
            bibtex_display = display_none.format(
                'bibtex-' + label, bib_string)

            abstract_text = str(
                LaTeXParser(entry.fields['abstract']).parse()) if 'abstract' in entry.fields else ''
            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += '[<a href="{}">details</a>] '.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title': str(LaTeXParser(entry.fields['title']).parse()),
                    'abstract': abstract_text,
                    'bibtex': bib_data.to_string('bibtex'),
                    'bibtex_link': '/' + bib_link if bibtex_dir else '',
                    'default_lang': self.site.config['DEFAULT_LANG'],
                    'label': label,
                    'lang': self.site.config['DEFAULT_LANG'],
                    'permalink': self.site.config['SITE_URL'] + page_url,
                    'reference': pub_html,
                    'extra_links': extra_links + bibtex_display
                }

                if 'fulltext' in entry.fields:
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')),
                    context,
                )

            html += extra_links

            # Add the hidden abstract and bibtex.
            if 'abstract' in entry.fields:
                html += '''
                <div id="{}" class="publication-abstract" style="display:none">
                <blockquote>{}</blockquote></div>
                '''.format('abstract-' + label, abstract_text)
            html += bibtex_display
            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
Esempio n. 46
0
    def filter_bibolamazifile(self, bibolamazifile):
        #
        # bibdata is a pybtex.database.BibliographyData object
        #

        if (not self.dupfile and not self.warn):
            logger.warning("duplicates filter: No action is being taken because neither "
                           "-sDupfile= nor -dWarn have been requested.")
            return

        bibdata = bibolamazifile.bibliographyData();

        used_citations = None
        
        if self.keep_only_used_in_jobname:
            if not self.dupfile:
                logger.warning("Option -sKeepOnlyUsedInJobname has no effect without -sDupfile=... !")
            else:
                logger.debug("Getting list of used citations from %s.aux." %(self.keep_only_used_in_jobname))
                used_citations = auxfile.get_all_auxfile_citations(
                    self.keep_only_used_in_jobname, bibolamazifile, self.name(),
                    self.jobname_search_dirs, return_set=True
                )

        duplicates = [];

        arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile)

        dupl_entryinfo_cache_accessor = self.cacheAccessor(DuplicatesEntryInfoCacheAccessor)

        for (key, entry) in bibdata.entries.iteritems():
            #cache_entries[key] = {}
            dupl_entryinfo_cache_accessor.prepare_entry_cache(key, entry, arxivaccess)


        newbibdata = BibliographyData();
        unused = BibliographyData();
        #unused_respawned = set() # because del unused.entries[key] is not implemented ... :(

        def copy_entry(entry):
            #return copy.deepcopy(entry) # too deep ...
            newpers = {}
            for role, plist in entry.persons.iteritems():
                newpers[role] = [copy.deepcopy(p) for p in plist]
            return Entry(type_=entry.type,
                         fields=entry.fields.items(), # will create own Fielddict
                         persons=newpers,
                         collection=entry.collection
                         )

        # Strategy: go through the list of entries, and each time keeping it if it is new,
        # or updating the original and registering the alias if it is a duplicate.
        #
        # With only_used, the situation is a little trickier as we cannot just discard the
        # entries as they are filtered: indeed, they might be duplicates of a used entry,
        # with which one should merge the bib information.
        #
        # So the full algorithm does not immediately discard the unused keys, but rather
        # keeps them in an `unused` list. If they are later required, they are respawned
        # into the actual new list.
        #

        for (key, entry) in bibdata.entries.iteritems():
            #
            # search the newbibdata object, in case this entry already exists.
            #
            #logger.longdebug('inspecting new entry %s ...', key);
            is_duplicate_of = None
            duplicate_original_is_unused = False
            for (nkey, nentry) in newbibdata.entries.iteritems():
                if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key),
                                             dupl_entryinfo_cache_accessor.get_entry_cache(nkey)):
                    logger.longdebug('    ... matches existing entry %s!', nkey);
                    is_duplicate_of = nkey;
                    break
            for (nkey, nentry) in unused.entries.iteritems():
                #if nkey in unused_respawned:
                #    continue
                if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key),
                                             dupl_entryinfo_cache_accessor.get_entry_cache(nkey)):
                    logger.longdebug('    ... matches existing entry %s!', nkey);
                    is_duplicate_of = nkey;
                    duplicate_original_is_unused = True
                    break

            #
            # if it's a duplicate
            #
            if is_duplicate_of is not None:
                dup = (key, is_duplicate_of)
                if duplicate_original_is_unused:
                    self.update_entry_with_duplicate(is_duplicate_of, unused.entries[is_duplicate_of],
                                                     key, entry)
                else:
                    # a duplicate of a key we have used. So update the original ...
                    self.update_entry_with_duplicate(is_duplicate_of, newbibdata.entries[is_duplicate_of],
                                                     key, entry)
                    # ... and register the alias.
                    duplicates.append(dup);

                if duplicate_original_is_unused and used_citations and key in used_citations:
                    # if we had set the original in the unused list, but we need the
                    # alias, then respawn the original to the newbibdata so we can refer
                    # to it. Bonus: use the name with which we have referred to it, so we
                    # don't need to register any duplicate.
                    newbibdata.add_entry(key, unused.entries[is_duplicate_of])
                    #unused_respawned.add(is_duplicate_of)
                    del unused.entries[is_duplicate_of]
            else:
                if used_citations is not None and key not in used_citations:
                    # new entry, but we don't want it. So add it to the unused list.
                    unused.add_entry(key, entry)
                else:
                    # new entry and we want it. So add it to the main newbibdata list.
                    newbibdata.add_entry(key, entry)

        # output duplicates to the duplicates file

        if (self.dupfile):
            # and write definitions to the dupfile
            dupfilepath = os.path.join(bibolamazifile.fdir(), self.dupfile);
            check_overwrite_dupfile(dupfilepath);
            dupstrlist = [];
            
            with codecs.open(dupfilepath, 'w', 'utf-8') as dupf:
                
                dupf.write(BIBALIAS_HEADER.replace('####DUP_FILE_NAME####', self.dupfile));
                
                if not self.custom_bibalias:
                    dupf.write(BIBALIAS_LATEX_DEFINITIONS)
                    
                # Note: Sort entries in some way (e.g. alphabetically according to
                # (alias, original)), to avoid diffs in VCS's
                for (dupalias, duporiginal) in sorted(duplicates, key=lambda x: (x[0],x[1])):
                    dupf.write((r'\bibalias{%s}{%s}' % (dupalias, duporiginal)) + "\n");
                    dupstrlist.append("\t%s is an alias of %s" % (dupalias,duporiginal)) ;

                dupf.write('\n\n');

            # issue debug message
            logger.debug("wrote duplicates to file: \n" + "\n".join(dupstrlist));

        if (self.warn and duplicates):
            def warnline(dupalias, duporiginal):
                def fmt(key, entry, cache_entry):
                    s = ", ".join(string.capwords('%s, %s' % (x[0], "".join(x[1]))) for x in cache_entry['pers']);
                    if 'title_clean' in cache_entry and cache_entry['title_clean']:
                        s += ', "' + (cache_entry['title_clean']).capitalize() + '"'
                    if 'j_abbrev' in cache_entry and cache_entry['j_abbrev']:
                        s += ', ' + cache_entry['j_abbrev']

                    f = entry.fields
                    if f.get('month',None) and f.get('year',None):
                        s += ', ' + f['month'] + ' ' + f['year']
                    elif f.get('month', None):
                        s += ', ' + f['month'] + ' <unknown year>'
                    elif f.get('year', None):
                        s += ', ' + f['year']
                        
                    if 'doi' in entry.fields and entry.fields['doi']:
                        s += ', doi:'+entry.fields['doi']
                    if 'arxivinfo' in cache_entry and cache_entry['arxivinfo']:
                        s += ', arXiv:'+cache_entry['arxivinfo']['arxivid']
                    if 'note_cleaned' in cache_entry and cache_entry['note_cleaned']:
                        s += '; ' + cache_entry['note_cleaned']

                    return s

                tw = textwrap.TextWrapper(width=DUPL_WARN_ENTRY_COLWIDTH)

                fmtalias = fmt(dupalias, bibdata.entries[dupalias],
                               dupl_entryinfo_cache_accessor.get_entry_cache(dupalias))
                fmtorig = fmt(duporiginal, bibdata.entries[duporiginal],
                              dupl_entryinfo_cache_accessor.get_entry_cache(duporiginal))
                linesalias = tw.wrap(fmtalias)
                linesorig = tw.wrap(fmtorig)
                maxlines = max(len(linesalias), len(linesorig))
                return (DUPL_WARN_ENTRY % { 'alias': dupalias,
                                            'orig': duporiginal
                                            }
                        + "\n".join( ('%s%s%s%s' %(' '*DUPL_WARN_ENTRY_BEGCOL,
                                                   linealias + ' '*(DUPL_WARN_ENTRY_COLWIDTH-len(linealias)),
                                                   ' '*DUPL_WARN_ENTRY_COLSEP,
                                                   lineorig)
                                      for (linealias, lineorig) in
                                      zip(linesalias + ['']*(maxlines-len(linesalias)),
                                          linesorig + ['']*(maxlines-len(linesorig)))) )
                        + "\n\n"
                        )
            logger.warning(DUPL_WARN_TOP  +
                           "".join([ warnline(dupalias, duporiginal)
                                     for (dupalias, duporiginal) in duplicates
                                     ])  +
                           DUPL_WARN_BOTTOM % {'num_dupl': len(duplicates)});

        # ### TODO: do this not only if we are given a dupfile?
        #if self.dupfile:
        # ### --> Bibolamazi v3: also set this if no dupfile was given. This is because we
        # ###     are moving entries themselves around and modifying them anyway
        #
        # set the new bibdata, without the duplicates
        # DON'T DO THIS, BECAUSE CACHES MAY HAVE KEPT A POINTER TO THE BIBDATA.
        #bibolamazifile.setBibliographyData(newbibdata);
        #
        # Instead, update bibolamazifile's bibliographyData() object itself.
        #
        bibolamazifile.setEntries(newbibdata.entries.iteritems())
        
        return
Esempio n. 47
0
    packages = yaml.load(package_file)


# only tagged packages go in release
with open('tags.json') as tag_file:
    tags = json.load(tag_file)

tagged = list(tags.keys())


from pybtex.database import BibliographyData, Entry

master_data = BibliographyData( {
    'article-minimal': Entry('article', [
        ('author', 'Leslie B. Lamport'),
        ('title', "blah blah blah"),
        ('journal', "Some outlet"),
        ('year', '1986'),
    ]),
})

# handle duplicates
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        package_bib = "tmp/{subpackage}/doc/_static/references.bib".format(subpackage=subpackage)
        if os.path.isfile(package_bib):
            local = pybtex.database.parse_file(package_bib)
            for entry in local.entries:
                if entry not in master_data.entries:
                    master_data.add_entry(entry, local.entries[entry])
                    print('adding', entry)
Esempio n. 48
0
    parser.add_argument('-y', help="Earliest year to report conflict (default={})".format(earlyyear), default=earlyyear, type=int)
    args = parser.parse_args()

    entries = set()
    dupentries=False
    with open(args.f, 'r') as bin:
        for l in bin:
            if l.startswith('@'):
                l = l.replace('@misc', '')
                l = l.replace('@article', '')
                l = l.replace('@inproceedings', '')
                if l in entries:
                    sys.stderr.write("Duplicate entry " + l.replace('{', '').replace(',', ''))
                    dupentries=True
                entries.add(l)

    if dupentries:
        sys.stderr.write("FATAL: The bibtex file has duplicate entries in it. Please remove them before trying to continue\n")
        sys.stderr.write("(It is an issue with Google Scholar, but pybtex breaks with duplicate entries. Sorry)\n")
        sys.exit(-1)

    bib = parse_file(args.f, 'bibtex')

    for e in bib.entries:
        if 'year' in bib.entries[e].fields:
            if int(bib.entries[e].fields['year']) >= args.y:
                bib_data = BibliographyData({e : bib.entries[e]})
                print(bib_data.to_string('bibtex'))


Esempio n. 49
0
    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = BibliographyData()
        self.__citations = set()
Esempio n. 50
0
  def __init__(self, root=None, **kwargs):
    Preprocessor.__init__(self, **kwargs)

    self._citations = []
    self._bibtex = BibliographyData()
    self._root = root
Esempio n. 51
0
  def run(self, lines):
    """
    Create a bibliography from cite commands.
    """

    # Join the content to enable regex searches throughout entire text
    content = '\n'.join(lines)

    # Build the database of bibtex data
    self._citations = []              # member b/c it is used in substitution function
    self._bibtex = BibliographyData() # ""
    bibfiles = []
    match = re.search(self.RE_BIBLIOGRAPHY, content)
    if match:
      bib_string = match.group(0)
      for bfile in match.group(1).split(','):
        try:
          bibfiles.append(os.path.join(self._docs_dir, bfile.strip()))
          data = parse_file(bibfiles[-1])
        except Exception as e:
          log.error('Failed to parse bibtex file: {}'.format(bfile.strip()))
          traceback.print_exc(e)
          return lines
        self._bibtex.add_entries(data.entries.iteritems())
    else:
      return lines

    # Determine the style
    match = re.search(self.RE_STYLE, content)
    if match:
      content = content.replace(match.group(0), '')
      try:
        style = find_plugin('pybtex.style.formatting', match.group(1))
      except:
        log.error('Unknown bibliography style "{}"'.format(match.group(1)))
        return lines

    else:
      style = find_plugin('pybtex.style.formatting', 'plain')

    # Replace citations with author date, as an anchor
    content = re.sub(self.RE_CITE, self.authors, content)

    # Create html bibliography
    if self._citations:

      # Generate formatted html using pybtex
      formatted_bibliography = style().format_bibliography(self._bibtex, self._citations)
      backend = find_plugin('pybtex.backends', 'html')
      stream = io.StringIO()
      backend().write_to_stream(formatted_bibliography, stream)

      # Strip the bib items from the formatted html
      html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL)

      # Produces an ordered list with anchors to the citations
      output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'.format(str(bibfiles))
      for i, item in enumerate(html):
        output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item)
      output += u'</ol>\n'
      content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content)

    return content.split('\n')
Esempio n. 52
0
    def run(self, lines):
        """
        Create a bibliography from cite commands.
        """

        # Join the content to enable regex searches throughout entire text
        content = '\n'.join(lines)

        # Build the database of bibtex data
        self._citations = []              # member b/c it is used in substitution function
        self._bibtex = BibliographyData() # ""
        bibfiles = []
        match = re.search(self.RE_BIBLIOGRAPHY, content)
        if match:
            for bfile in match.group(1).split(','):
                try:
                    filename, _ = self.getFilename(bfile.strip())
                    bibfiles.append(filename)
                    data = self.parseBibtexFile(bibfiles[-1])
                    self._bibtex.add_entries(data.entries.iteritems())
                except UndefinedMacro:
                    LOG.error('Undefined macro in bibtex file: %s, specify macro_files arguments ' \
                              'in configuration file (e.g. website.yml)', bfile.strip())
                except TypeError:
                    LOG.error('Unable to locate bibtex file in %s', self.markdown.current.filename)
                except BibliographyDataError as e:
                    LOG.error('%s in %s', str(e), self.markdown.current.filename)
                except Exception as e: #pylint: disable=broad-except
                    LOG.error('Unknown error in %s when parsing bibtex file: %s', str(e),
                              self.markdown.current.filename)
        else:
            return lines

        # Determine the style
        match = re.search(self.RE_STYLE, content)
        if match:
            content = content.replace(match.group(0), '')
            try:
                style = find_plugin('pybtex.style.formatting', match.group(1))
            except PluginNotFound:
                LOG.error('Unknown bibliography style "%s"', match.group(1))
                return lines

        else:
            style = find_plugin('pybtex.style.formatting', 'plain')

        # Replace citations with author date, as an anchor
        content = re.sub(self.RE_CITE, self.authors, content)

        # Create html bibliography
        if self._citations:

            # Generate formatted html using pybtex
            formatted_bibliography = style().format_bibliography(self._bibtex, self._citations)
            backend = find_plugin('pybtex.backends', 'html')
            stream = io.StringIO()
            backend().write_to_stream(formatted_bibliography, stream)

            # Strip the bib items from the formatted html
            html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(),
                              flags=re.MULTILINE|re.DOTALL)

            # Produces an ordered list with anchors to the citations
            output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'
            output = output.format(str(bibfiles))
            for i, item in enumerate(html):
                output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item)
            output += u'</ol>\n'
            content = re.sub(self.RE_BIBLIOGRAPHY,
                             self.markdown.htmlStash.store(output, safe=True),
                             content)

        return content.split('\n')