Beispiel #1
0
def parse_bibfile(bibfilename: str, encoding: str) -> "BibliographyData":
    """Parse *bibfilename* with given *encoding*, and return parsed data."""
    parser = Parser(encoding)
    logger.info("parsing bibtex file {0}... ".format(bibfilename), nonl=True)
    parser.parse_file(bibfilename)
    logger.info("parsed {0} entries".format(len(parser.data.entries)))
    return parser.data
Beispiel #2
0
def references_to_markdown(references):
    """Utility function to convert a BibTeX string containing
    references into a Markdown string.

    Args:
      references: BibTeX string

    Returns:
      Markdown string

    """

    pybtex_style = find_plugin('pybtex.style.formatting', 'plain')()
    pybtex_md_backend = find_plugin('pybtex.backends', 'markdown')
    pybtex_parser = Parser()

    # hack to not print labels (may remove this later)
    def write_entry(self, key, label, text):
        self.output(u'%s  \n' % text)

    pybtex_md_backend.write_entry = write_entry
    pybtex_md_backend = pybtex_md_backend()

    data = pybtex_parser.parse_stream(StringIO(references))
    data_formatted = pybtex_style.format_entries(data.entries.itervalues())
    output = StringIO()
    pybtex_md_backend.write_to_stream(data_formatted, output)

    # add blockquote style
    references_md = '> {}'.format(output.getvalue())
    references_md.replace('\n', '\n> ')

    return references_md
Beispiel #3
0
    def run(self):

        style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))()
        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'], reverse=True)
        _, entries = zip(*data)

        html = '<div class = "publication-list">\n'
        cur_year = None

        for entry in entries:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            html += '<li class = "publication">{}</li>'.format(
                list(style.format_entries((entry,)))[0].text.render_as('html'))

        if len(entries) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
Beispiel #4
0
def parse_bibtex(app):
    # load bibtex path
    if not app.config.bibtex_path:
        raise SphinxWarning('No BibTeX path specified.')

    if os.path.isabs(app.config.bibtex_path):
        filepath = app.config.bibtex_path
    else:
        filepath = os.path.join(app.confdir, app.config.bibtex_path)

    if not os.path.exists(filepath):
        raise SphinxWarning("BibTeX file (%s) does not exists." % filepath)

    app.env.bibtex_path = filepath

    # parse bib
    parser = BibtexParser()

    try:
        data = parser.parse_file(filepath)
    except PybtexError as ex:
        raise SphinxWarning(ex)

    app.env.bibtex_entries = data.entries

    # latex reconfiguration
    filename = os.path.splitext(os.path.basename(filepath))[0]
    app.config.latex_elements.setdefault('footer', '')
    app.config.latex_elements['footer'] += "\\bibliography{%s}" % filename
Beispiel #5
0
def parse_bibtex(app):
    # load bibtex path
    if not app.config.bibtex_path:
        raise SphinxWarning('No BibTeX path specified.')

    if os.path.isabs(app.config.bibtex_path):
        filepath = app.config.bibtex_path
    else:
        filepath = os.path.join(app.confdir, app.config.bibtex_path)

    if not os.path.exists(filepath):
        raise SphinxWarning("BibTeX file (%s) does not exists." % filepath)

    app.env.bibtex_path = filepath

    # parse bib
    parser = BibtexParser()

    try:
        data = parser.parse_file(filepath)
    except PybtexError as ex:
        raise SphinxWarning(ex)

    app.env.bibtex_entries = data.entries

    # latex reconfiguration
    filename = os.path.splitext(os.path.basename(filepath))[0]
    app.config.latex_elements.setdefault('footer', '')
    app.config.latex_elements['footer'] += "\\bibliography{%s}" % filename
Beispiel #6
0
def load_citekey(citekey, bibfile=BIBFILE):
    """ Get the info corresponding to specific citekey
    from bibfile.
    """
    parser = Parser()
    bib_data = parser.parse_file(bibfile)
    entry = bib_data.entries[citekey]
    return entry
Beispiel #7
0
def load_citekey(citekey, bibfile=BIBFILE):
    """ Get the info corresponding to specific citekey
    from bibfile.
    """
    parser = Parser()
    bib_data = parser.parse_file(bibfile)
    entry = bib_data.entries[citekey]
    return entry
    def run(self):

        style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))()
        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        highlight_author = self.options.get('highlight_author', None)

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'], reverse=True)

        print(type(data))
        html = '<div class = "publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            pub_html = list(style.format_entries((entry,)))[0].text.render_as('html')
            if highlight_author:  # highlight an author (usually oneself)
                pub_html = pub_html.replace(highlight_author,
                                            '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class = "publication">' + pub_html

            extra_links = ""
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data = BibliographyData(dict({label: entry}))
                bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex')
                extra_links += '[<a href="{}">bibtex</a>] '.format(bib_link)

            if 'pdf' in entry.fields:  # the link to the pdf file
                extra_links += '[<a href="{}">pdf</a>] '.format(entry.fields['pdf'])

            if extra_links:
                html += '<br/>' + extra_links

            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
Beispiel #9
0
    def setUp(self):
        unittest.TestCase.setUp(self)

        curdir = os.path.dirname(os.path.abspath(__file__))

        parser = BibtexParser()
        self.entries = \
            parser.parse_file(os.path.join(curdir, 'testdata', 'bibtex.bib')).entries

        self.style = Style(os.path.join(curdir, 'styles'), 'apa')
Beispiel #10
0
def doi2Entry(doi):
    """Returns a pybtex.database.BibliographyData from a doi"""
    bib_parser = Parser()
    bibtext = doi2bibtex(doi)
    if bibtext != None:
        bibtext = bibtext.decode('utf8')
        bib_data = bib_parser.parse_stream(StringIO(bibtext))
        return bib_data
    else:
        return None
Beispiel #11
0
def add_citations(generators):
    global global_bib
    global content_path
    if not pyb_imported:
        logger.warn('`pelican-cite` failed to load dependency `pybtex`')
        return

    if 'PUBLICATIONS_SRC' in generators[0].settings:
        refs_file = generators[0].settings['PUBLICATIONS_SRC']
        refs_file = os.path.join(content_path, refs_file)
        try:
            global_bib = Parser().parse_file(refs_file)
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (refs_file, str(e)))

    if 'PATH' in generators[0].settings:
        content_path = generators[0].settings['PATH']
    else:
        logger.warn('`pelican-cite` failed to obtain content path')

    # Process the articles and pages
    for generator in generators:
        if isinstance(generator, ArticlesGenerator):
            for article in generator.articles:
                process_content(article)
        elif isinstance(generator, PagesGenerator):
            for page in generator.pages:
                process_content(page)
Beispiel #12
0
def make_bibliography(aux_filename,
                      bib_format=None,
                      bib_encoding=None,
                      output_encoding=None,
                      bst_encoding=None,
                      min_crossrefs=2,
                      **kwargs):

    from os import path

    import pybtex.io
    from pybtex.bibtex import bst
    from pybtex.bibtex.interpreter import Interpreter
    from pybtex import auxfile

    if bib_format is None:
        from pybtex.database.input.bibtex import Parser as bib_format
    aux_data = auxfile.parse_file(aux_filename, output_encoding)
    bst_filename = aux_data.style + path.extsep + 'bst'
    bst_script = bst.parse_file(bst_filename, bst_encoding)
    base_filename = path.splitext(aux_filename)[0]
    bbl_filename = base_filename + path.extsep + 'bbl'
    bib_filenames = [
        filename + bib_format.get_default_suffix()
        for filename in aux_data.data
    ]
    bbl_file = pybtex.io.open_unicode(bbl_filename,
                                      'w',
                                      encoding=output_encoding)
    interpreter = Interpreter(bib_format, bib_encoding)
    interpreter.run(bst_script,
                    aux_data.citations,
                    bib_filenames,
                    bbl_file,
                    min_crossrefs=min_crossrefs)
Beispiel #13
0
def make_bibliography(aux_filename,
        bib_format=None,
        bib_encoding=None,
        output_encoding=None,
        bst_encoding=None,
        min_crossrefs=2,
        **kwargs
    ):

    from os import path

    import pybtex.io
    from pybtex.bibtex import bst
    from pybtex.bibtex.interpreter import Interpreter
    from pybtex import auxfile


    if bib_format is None:
        from pybtex.database.input.bibtex import Parser as bib_format
    aux_data = auxfile.parse_file(aux_filename, output_encoding)
    bst_filename = aux_data.style + path.extsep + 'bst'
    bst_script = bst.parse_file(bst_filename, bst_encoding)
    base_filename = path.splitext(aux_filename)[0]
    bbl_filename = base_filename + path.extsep + 'bbl'
    bib_filenames = [filename + bib_format.get_default_suffix() for filename in aux_data.data]
    bbl_file = pybtex.io.open_unicode(bbl_filename, 'w', encoding=output_encoding)
    interpreter = Interpreter(bib_format, bib_encoding)
    interpreter.run(bst_script, aux_data.citations, bib_filenames, bbl_file, min_crossrefs=min_crossrefs)
Beispiel #14
0
def add_citations(generators):
    global global_bib
    if not pyb_imported:
        logger.warn("`pelican-cite` failed to load dependency `pybtex`")
        return

    if "PUBLICATIONS_SRC" in generators[0].settings:
        refs_file = generators[0].settings["PUBLICATIONS_SRC"]
        try:
            global_bib = Parser().parse_file(refs_file)
        except PybtexError as e:
            logger.warn(
                "`pelican_bibtex` failed to parse file %s: %s" % (refs_file, str(e))
            )

    # Process the articles and pages
    for generator in generators:
        if isinstance(generator, ArticlesGenerator):
            for article in (
                generator.articles + generator.translations + generator.drafts
            ):
                process_content(article)
        elif isinstance(generator, PagesGenerator):
            for page in generator.pages:
                process_content(page)
Beispiel #15
0
def groomBib(bibfile, groomedfile=""):
    """Grooms a bib file according to the present doi or to a doi found in internet. Returns a filename.verified.bib and  filename.unverified.bib"""
    if groomedfile == "":
        groomedfile = bibfile + ".groomed.bib"


    with open(bibfile,"r") as f:
        bstr = f.read()
    # Assume the input is utf8 encoded
    bstr = bstr.decode('utf8')
    
    # Parse the bibtex file
    bib_parser = Parser()
    bib_data = bib_parser.parse_stream(StringIO(bstr))

    #first I groom the original bib file:
    bib_cleandata=cleanBibliographyData(bib_data)
    listofpersons =[]
    if bib_cleandata  != None:
        for (key,entry) in bib_cleandata.entries.items():
            if _journal_field in entry.fields:
                journal = entry.fields[_journal_field]
                journaltitlecase = titlecase.titlecase(journal)
                if journal != journaltitlecase:
                    entry.fields[_journal_field] = changeThisforThat(journal,journaltitlecase ,"Change for %s." % _journal_field)

            #check inconsistencies in names
            for persontype in entry.persons:  #authors for example
                for ip in range(0,len( entry.persons[persontype])):
                    lastname = entry.persons[persontype][ip].get_part_as_text("last")
                    firstname = entry.persons[persontype][ip].get_part_as_text("first")
                    middle=entry.persons[persontype][ip].get_part_as_text("middle")
                    prelast=entry.persons[persontype][ip].get_part_as_text("prelast")
                    lineage= entry.persons[persontype][ip].get_part_as_text("lineage")
                    result = [element for element in listofpersons   #if they have the same lastname and different names tell
                             if element[0] == lastname and (firstname !=element[1] or middle !=element[2] or prelast != element[3] or lineage != element[4] )] 

                    if len(result) >0:  #warns regarding conflict with names
                        for possibleresults in result:
                            oneversion = Person(last =lastname,first=firstname , middle=middle,prelast=prelast,lineage=lineage)
                            otherversion = Person(last =possibleresults[0],first=possibleresults[1] , middle=possibleresults[2],prelast=possibleresults[3],lineage=possibleresults[4])
                            warnings.warn("Possible problem with authors: %s vs. %s" % (oneversion,otherversion ))
                    listofpersons = listofpersons + [[lastname, firstname, middle,prelast,lineage]]     
            
    savebib(bib_cleandata, groomedfile)
    return "Done"
Beispiel #16
0
def remfieldsBib(bibfile, cleanfile=""):
    """Grooms a bib file according to the present doi or to a doi found in internet. Returns a filename.verified.bib and  filename.unverified.bib"""
    if cleanfile == "":
        cleanfile = bibfile + ".clean.bib"


    with open(bibfile,"r") as f:
        bstr = f.read()
    # Assume the input is utf8 encoded
    bstr = bstr.decode('utf8')
    
    # Parse the bibtex file
    bib_parser = Parser()
    bib_data = bib_parser.parse_stream(StringIO(bstr))
    for (key,entry) in bib_data.entries.items():
        for field_name in _unwanted_fields:
            if field_name in entry.fields:
                del entry.fields[field_name]

    
    savebib(bib_data, cleanfile)
    return "Done"
    def run(self):
        refs_file = self.arguments[0].strip()

        try:
            bibdata_all = Parser().parse_file(refs_file)
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (refs_file, str(e)))
            return

        # format entries
        jo_style = jo.Style()
        jo_style.strong = 'Razik'
        html_backend = html.Backend()
        formatted_entries = jo_style.format_entries(
            bibdata_all.entries.values())

        publications = []

        for formatted_entry in formatted_entries:
            key = formatted_entry.key
            entry = bibdata_all.entries[key]
            year = entry.fields.get('year')
            # This shouldn't really stay in the field dict
            # but new versions of pybtex don't support pop
            pdf = entry.fields.get('pdf', None)
            slides = entry.fields.get('slides', None)
            poster = entry.fields.get('poster', None)

            # render the bibtex string for the entry
            bib_buf = StringIO()
            bibdata_this = BibliographyData(entries={key: entry})
            Writer().write_stream(bibdata_this, bib_buf)
            text = formatted_entry.text.render(html_backend)

            publications.append(
                (key, year, text, bib_buf.getvalue(), pdf, slides, poster))

        # Load the publications template
        if 'template' in self.options:
            template_path = self.options['template']
            template_dir, template_name = os.path.split(template_path)
            env = Environment(loader=FileSystemLoader(template_dir))
            template = env.get_template(template_name)
        else:
            # Use template from the Pelican theme
            template = pelican_generator.get_template('publications')

        rendered_template = template.render(publications=publications)
        return [nodes.raw('', rendered_template, format='html')]
Beispiel #18
0
def get_bib_file(article):
    """
    If a bibliography file is specified for this article/page, parse
    it and return the parsed object.
    """
    if 'publications_src' in article.metadata:
        refs_file = article.metadata['publications_src']
        try:
            local_bib = Parser().parse_file(refs_file)
            return local_bib
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (refs_file, str(e)))
            return global_bib
    else:
        return global_bib
Beispiel #19
0
def init(pelican_instance):
    global global_bib, bibliography_start, bibliography_end
    if not pyb_imported:
        logger.warning('`pelican-cite` failed to load dependency `pybtex`')
        return

    if 'BIBLIOGRAPHY_START' in pelican_instance.settings:
        bibliography_start = pelican_instance.settings['BIBLIOGRAPHY_START']
    if 'BIBLIOGRAPHY_END' in pelican_instance.settings:
        bibliography_end = pelican_instance.settings['BIBLIOGRAPHY_END']

    if 'PUBLICATIONS_SRC' in pelican_instance.settings:
        refs_file = pelican_instance.settings['PUBLICATIONS_SRC']
        try:
            global_bib = Parser().parse_file(refs_file)
        except PybtexError as e:
            logger.warning('`pelican_bibtex` failed to parse file %s: %s' %
                           (refs_file, str(e)))
Beispiel #20
0
def get_bib_file(article):
    """
    If a bibliography file is specified for this article/page, parse
    it and return the parsed object.
    """
    if 'publications_src' in article.metadata:
        refs_file = article.metadata['publications_src']
        try:
            logger.warning(
                f"Looking for bib file at {os.path.dirname(article.source_path) + os.path.sep + refs_file}"
            )
            local_bib = Parser().parse_file(
                os.path.dirname(article.source_path) + os.path.sep + refs_file)
            return local_bib
        except PybtexError as e:
            logger.warning('`pelican_bibtex` failed to parse file %s: %s' %
                           (refs_file, str(e)))
            return global_bib
    else:
        return global_bib
Beispiel #21
0
        return fields[x]
    else:
        return ""


BIBFILE = 'cs.bib'

jfile = open('journal.text', 'w')
cfile = open('conf.text', 'w')

cfile_selected = open('conf_selected.text', 'w')
cfile_selected2 = open('conf_selected2.text', 'w')

cfile_other = open('conf_other.text', 'w')

parser = Parser()
bib_data = parser.parse_file(BIBFILE)
bib_sorted = sorted(bib_data.entries.items(), key=cmp_to_key(sort_list_cmp))

jnumber = 0
jyear = 9999
print_jyear = 0

cnumber = 0
cyear = 9999
print_cyear = 0

cnumber_select = 0
cyear_select = 9999
print_cyear_select = 0
Beispiel #22
0
def meta_str_format(val,meta):
    return val

META_FORMATS = {
    'dir':meta_dir_format,
    'jinja':meta_jinja_format,
    'csv':meta_csv_format,
    'jsonfile':meta_jsonfile_format,
    'json':meta_json_format,
    'str':meta_str_format
}

try:
    from pybtex.database.input.bibtex import Parser as BibParser
    P = BibParser()
    P.macros['true']=True

    CONVERT_KEYS=['title','pages']
    def fromBTeX(t):
        ret = t.replace('{','').replace('}','').replace("\\'\\i",u'í').replace('\\vc',u'č').replace('---','&mdash;').replace('--','&ndash;')
        return ret.replace('\\textendash','&ndash;')

    def meta_bib_format(val,meta):
        bib = P.parse_file(val)
        ret = []
        for entry in bib.entries.values():
            pub = {'type':entry.type}
            pub.update(entry.fields)
            authors = entry.persons.get('author',[])
            editors = entry.persons.get('editor',[])
Beispiel #23
0
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        Local path to the BibTeX file to read.

    generator.settings['PUBLICATIONS_SPLIT_BY']:
        The name of the bibtex field used for splitting the publications.
        No splitting if title is not provided.

    generator.settings['PUBLICATIONS_UNTAGGED_TITLE']:
        The title of the header for all untagged entries.
        No such list if title is not provided.

    Output
    ------
    generator.context['publications_lists']:
        A map with keys retrieved from the field named in PUBLICATIONS_SPLIT_TAG.
        Values are lists of tuples (key, year, text, bibtex, pdf, slides, poster)
        See Readme.md for more details.

    generator.context['publications']:
        Contains all publications as a list of tuples
        (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bib` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bib` failed to parse file %s: %s' %
                    (refs_file, str(e)))
        return

    publications = []
    publications_lists = {}
    publications_untagged = []

    split_by = None
    untagged_title = None

    if 'PUBLICATIONS_SPLIT_BY' in generator.settings:
        split_by = generator.settings['PUBLICATIONS_SPLIT_BY']

    if 'PUBLICATIONS_UNTAGGED_TITLE' in generator.settings:
        untagged_title = generator.settings['PUBLICATIONS_UNTAGGED_TITLE']

    # format entries
    plain_style = plain.Style()
    html_backend = html.Backend()
    formatted_entries = plain_style.format_entries(
        bibdata_all.entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        slides = entry.fields.get('slides', None)
        poster = entry.fields.get('poster', None)

        tags = []
        if split_by:
            tags = entry.fields.get(split_by, [])

            # parse to list, and trim each string
            if tags:

                tags = [tag.strip() for tag in tags.split(',')]

                # create keys in publications_lists if at least one
                # tag is given
                for tag in tags:
                    publications_lists[tag] = publications_lists.get(tag, [])

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)
        text = formatted_entry.text.render(html_backend)

        entry_tuple = {
            'key': key,
            'year': year,
            'text': text,
            'bibtex': bib_buf.getvalue(),
            'pdf': pdf,
            'slides': slides,
            'poster': poster
        }

        publications.append(entry_tuple)

        for tag in tags:
            publications_lists[tag].append(entry_tuple)

        if not tags and untagged_title:
            publications_untagged.append(entry_tuple)

    # append untagged list if title is given
    if untagged_title and publications_untagged:
        publications_lists[untagged_title] = publications_untagged

    # output
    generator.context['publications'] = publications
    generator.context['publications_lists'] = publications_lists
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bibtex` failed to parse file %s: %s' % (
            refs_file,
            str(e)))
        return

    publications = []

    # format entries
    plain_style = plain.Style()
    html_backend = html.Backend()
    formatted_entries = plain_style.format_entries(bibdata_all.entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        slides = entry.fields.get('slides', None)
        poster = entry.fields.get('poster', None)

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)
        text = formatted_entry.text.render(html_backend)

        publications.append((key,
                             year,
                             text,
                             bib_buf.getvalue(),
                             pdf,
                             slides,
                             poster))

    generator.context['publications'] = publications
Beispiel #25
0
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain, toplevel
        from pybtex.style.template import (sentence, words,
                                           optional, optional_field, field, tag)
        from pybtex.richtext import Symbol
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warning('`pelican_bibtex` failed to parse file %s: %s' % (
            refs_file,
            str(e)))
        return

    class CustomStyle(plain.Style):

        def format_bold_title(self, e, which_field, as_sentence=True):
            formatted_title = tag('strong')[field(which_field)]
            if as_sentence:
                return sentence[formatted_title]
            else:
                return formatted_title

        def get_inproceedings_template(self, e):
            template = toplevel[
                self.format_bold_title(e, 'title'),
                Symbol('newline'),
                sentence[self.format_names('author')],
                Symbol('newline'),
                words[
                    'In',
                    sentence[
                        optional[self.format_editor(e, as_sentence=False)],
                        self.format_btitle(e, 'booktitle', as_sentence=False),
                        self.format_volume_and_series(e, as_sentence=False),
                    ],
                    self.format_address_organization_publisher_date(e),
                ],
                sentence[optional_field('note')],
                self.format_web_refs(e),
            ]
            return template

        def get_article_template(self, e):
            volume_and_pages = first_of[
                # volume and pages, with optional issue number
                optional[
                    join[
                        field('volume'),
                        optional['(', field('number'), ')'],
                        ':', pages
                    ],
                ],
                # pages only
                words['pages', pages],
            ]
            template = toplevel[
                self.format_bold_title(e, 'title'),
                Symbol('newline'),
                self.format_names('author'),
                Symbol('newline'),
                sentence[
                    tag('em')[field('journal')],
                    optional[volume_and_pages],
                    date],
                sentence[optional_field('note')],
                self.format_web_refs(e),
            ]
            return template

        def get_techreport_template(self, e):
            template = toplevel[
                self.format_bold_title(e, 'title'),
                Symbol('newline'),
                sentence[self.format_names('author')],
                Symbol('newline'),
                sentence[
                    words[
                        first_of[
                            optional_field('type'),
                            'Technical Report',
                        ],
                        optional_field('number'),
                    ],
                    field('institution'),
                    optional_field('address'),
                    date,
                ],
                sentence[optional_field('note')],
                self.format_web_refs(e),
            ]
            return template

        def format_entry(self, label, entry, bib_data=None):
            return super().format_entry(label, entry, bib_data)

    publications = []

    # format entries
    my_style = CustomStyle()
    html_backend = html.Backend()
    html_backend.symbols.update({'newline': '<br>'})
    formatted_entries = my_style.format_entries(bibdata_all.entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        slides = entry.fields.get('slides', None)
        poster = entry.fields.get('poster', None)
        entrytype = entry.fields.get('type', None)

        # render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)
        text = formatted_entry.text.render(html_backend)

        publications.append((key,
                             year,
                             text,
                             bib_buf.getvalue(),
                             pdf,
                             slides,
                             poster,
                             entrytype))
    publications.sort(key=itemgetter(1), reverse=True)

    generator.context['publications'] = publications
Beispiel #26
0
    def run(self):

        style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))()
        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_author = self.options.get('highlight_author', None)
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'], reverse=True)

        print(type(data))
        html = '<div class = "publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            pub_html = list(style.format_entries((entry,)))[0].text.render_as('html')
            if highlight_author:  # highlight an author (usually oneself)
                pub_html = pub_html.replace(highlight_author,
                                            '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class = "publication">' + pub_html

            extra_links = ""
            bib_data = BibliographyData(dict({label: entry}))  # detail_page_dir may need it later
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex')
                extra_links += '[<a href="{}">BibTeX</a>] '.format(
                    self.site.config['BASE_URL'] + bib_link)

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(entry.fields['fulltext'])

            if extra_links or detail_page_dir:
                html += '<br>'
            html += extra_links

            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += ' [<a href="{}">abstract and details</a>]'.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title': process_bibtex_string(entry.fields['title']),
                    'abstract': process_bibtex_string(entry.fields['abstract']) if 'abstract' in entry.fields else '',
                    'bibtex': bib_data.to_string('bibtex'),
                    'bibtex_link': '/' + bib_link if bibtex_dir else '',
                    'default_lang': self.site.config['DEFAULT_LANG'],
                    'label': label,
                    'lang': self.site.config['DEFAULT_LANG'],
                    'permalink': self.site.config['SITE_URL'] + page_url,
                    'reference': pub_html,
                    'extra_links': extra_links
                }

                if 'fulltext' in entry.fields and entry.fields['fulltext'].endswith('.pdf'):
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')),
                    context,
                )

            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.

    """
    # check if settings are provided via pelicanconf.py
    settings_present = False
    for s in ['PUBLICATIONS_SRC', 'PRESENTATIONS_SRC', 'POSTERS_SRC']:
        if s in generator.settings:
            settings_present = True
    if not settings_present:
        return

    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    for s, c in zip(['PUBLICATIONS_SRC', 'PRESENTATIONS_SRC', 'POSTERS_SRC'],
                    ['publications', 'presentations', 'posters']):
        if s not in generator.settings:
            continue
        refs_file = generator.settings[s]
        try:
            bibdata_all = Parser().parse_file(refs_file)
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (refs_file, str(e)))
            continue

        publications = []

        # format entries
        plain_style = plain.Style()
        html_backend = html.Backend()
        all_entries = bibdata_all.entries.values()

        # remove URL field if DOI is present
        for entry in all_entries:
            if "doi" in entry.fields.keys():
                entry.fields._dict["url"] = ""

        formatted_entries = plain_style.format_entries(all_entries)
        for formatted_entry in formatted_entries:
            key = formatted_entry.key
            entry = bibdata_all.entries[key]
            year = entry.fields.get('year')
            slides = entry.fields.pop('slides', None)
            poster = entry.fields.pop('poster', None)

            # add PDF link if file is present
            # Zotero exports a 'file' field, which contains the 'Zotero' and
            # 'Filesystem' filenames, seperated by ':'
            try:
                filename = entry.fields['file'].split(':')[0]
                if os.access(os.path.join('content', 'download', filename),
                             os.R_OK):
                    pdf = os.path.join('download', filename)
                else:
                    pdf = None
            except KeyError:
                pdf = None

            #render the bibtex string for the entry
            bib_buf = StringIO()
            bibdata_this = BibliographyData(entries={key: entry})
            Writer().write_stream(bibdata_this, bib_buf)
            text = formatted_entry.text.render(html_backend)
            doi = (entry.fields.get('doi')
                   if 'doi' in entry.fields.keys() else "")
            url = (entry.fields.get('url')
                   if 'url' in entry.fields.keys() else "")

            # prettify entries
            # remove BibTeX's {}
            text = text.replace("\{", "")
            text = text.replace("{", "")
            text = text.replace("\}", "")
            text = text.replace("}", "")
            # subscript 2 in NO2, CO2, SO2
            text = text.replace("NO2", "NO<sub>2</sub>")
            text = text.replace("CO2", "CO<sub>2</sub>")
            text = text.replace("CO2", "CO<sub>2</sub>")
            # for posters and presentations, make for nicer printing
            text = text.replace("In <em>", "Presented at <em>")
            # remove empty URL link
            text = text.replace("<a href=\"\">URL:</a>, ", "")

            publications.append((key, year, text, bib_buf.getvalue(), doi, url,
                                 pdf, slides, poster))

            # store the list of artifacts in the generator context
            generator.context[c] = publications
Beispiel #28
0
    def run(self):

        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_authors = self.options.get('highlight_author', None)
        if highlight_authors:
            highlight_authors = highlight_authors.split(';')
        style = Style(self.site.config['BASE_URL'] +
                      detail_page_dir if detail_page_dir else None)
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'],
                      reverse=True)

        html = '<div class="publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.makedirs(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.makedirs(
                    os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            entry.label = label  # Pass label to the style.
            pub_html = list(style.format_entries(
                (entry, )))[0].text.render_as('html')
            if highlight_authors:  # highlight one of several authors (usually oneself)
                for highlight_author in highlight_authors:
                    pub_html = pub_html.replace(
                        highlight_author.strip(),
                        '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html

            extra_links = ""

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(
                    entry.fields['fulltext'])

            bibtex_fields = dict(entry.fields)
            # Collect and remove custom links (fields starting with "customlink")
            custom_links = dict()
            for key, value in bibtex_fields.items():
                if key.startswith('customlink'):
                    custom_links[key[len('customlink'):]] = value
            # custom fields (custom links)
            for key, value in custom_links.items():
                extra_links += '[<a href="{}">{}</a>] '.format(value, key)

            # Remove some fields for the publicly available BibTeX file since they are mostly only
            # used by this plugin.
            for field_to_remove in ('abstract', 'fulltext'):
                if field_to_remove in bibtex_fields:
                    del bibtex_fields[field_to_remove]
            # Prepare for the bib file. Note detail_page_dir may need bib_data later.
            bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons)
            bib_data = BibliographyData(dict({label: bibtex_entry}))
            bib_string = bib_data.to_string('bibtex')
            extra_links += '''
            [<a href="javascript:void(0)" onclick="
            (function(target, id) {{
              if ($('#' + id).css('display') == 'block')
              {{
                $('#' + id).hide('fast');
                $(target).text('BibTeX&#x25BC;')
              }}
              else
              {{
                $('#' + id).show('fast');
                $(target).text('BibTeX&#x25B2;')
              }}
            }})(this, '{}');">BibTeX&#x25BC;</a>]
            '''.format('bibtex-' + label)
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]),
                                 'bibtex')

            if extra_links or detail_page_dir or 'abstract' in entry.fields:
                html += '<br>'

            # Add the abstract link.
            if 'abstract' in entry.fields:
                html += '''
                [<a href="javascript:void(0)" onclick="
                (function(target, id) {{
                  if ($('#' + id).css('display') == 'block')
                {{
                  $('#' + id).hide('fast');
                  $(target).text('abstract&#x25BC;')
                }}
                else
                {{
                  $('#' + id).show('fast');
                  $(target).text('abstract&#x25B2;')
                }}
                }})(this, '{}');">abstract&#x25BC;</a>] '''.format(
                    'abstract-' + label)

            display_none = '<div id="{}" style="display:none"><pre>{}</pre></div>'
            bibtex_display = display_none.format('bibtex-' + label, bib_string)

            abstract_text = str(LaTeXParser(entry.fields['abstract']).parse()
                                ) if 'abstract' in entry.fields else ''
            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += '[<a href="{}">details</a>] '.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title': str(LaTeXParser(entry.fields['title']).parse()),
                    'abstract': abstract_text,
                    'bibtex': bib_data.to_string('bibtex'),
                    'bibtex_link': '/' + bib_link if bibtex_dir else '',
                    'default_lang': self.site.config['DEFAULT_LANG'],
                    'label': label,
                    'lang': self.site.config['DEFAULT_LANG'],
                    'permalink': self.site.config['SITE_URL'] + page_url,
                    'reference': pub_html,
                    'extra_links': extra_links + bibtex_display
                }

                if 'fulltext' in entry.fields and entry.fields[
                        'fulltext'].endswith('.pdf'):
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir,
                                      label + '.html')),
                    context,
                )

            html += extra_links

            # Add the hidden abstract and bibtex.
            if 'abstract' in entry.fields:
                html += '''
                <div id="{}" class="publication-abstract" style="display:none">
                <blockquote>{}</blockquote></div>
                '''.format('abstract-' + label, abstract_text)
            html += bibtex_display
            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [
            nodes.raw('', html, format='html'),
        ]
Beispiel #29
0
    def update_contents(self, new_store_contents):
        """
        Structure -> mpid -> BibTeX references from MP -> (optional doi lookup
        via Crossref) -> formatting.
        Formatting is very messy right now.
        DOI lookup and (possibly) formatting should be cached in a builder.
        """

        struct = self.from_data(new_store_contents)

        if not isinstance(struct, Structure):
            raise PreventUpdate(
                "Literature mentions can only be retrieved for crystallographic "
                "structures at present and not molecules. Please make a feature "
                "request if this would be useful for you, and it will be "
                "prioritized."
            )

        with MPRester() as mpr:
            mpids = mpr.find_structure(struct)

            if len(mpids) == 0:
                raise PreventUpdate(
                    "No structures in the Materials Project database match this "
                    "crystal structure, so literature mentions cannot be retrieved. "
                    "Please submit this structure to Materials Project if you'd "
                    "like it to be added to the Materials Project database."
                )

            all_references = []
            for mpid in mpids:
                all_references.append(mpr.get_materials_id_references(mpid))
                self.logger.debug(f"Retrieved references for {mpid}.")

        if self.use_crossref:

            cr = Crossref(mailto=CROSSREF_MAILTO)
            individual_references = set()
            for references in all_references:
                individual_references.update(set(references.split("\n\n")))

            # exclude Materials Proect references (these are intended to be
            # references for the structure specifically)
            refs_to_remove = set()
            for ref in individual_references:
                if "Jain2013" in ref:
                    refs_to_remove.add(ref)
            individual_references -= refs_to_remove

            works = [cr.works(query=ref, limit=1) for ref in individual_references]
            self.logger.debug(f"Retrieved {len(works)} works from Crossref.")

            items = [
                work["message"]["items"][0]
                for work in works
                if len(work["message"]["items"]) > 0
            ]

            dois_to_item = {
                item["DOI"]: {
                    "cited-by": item.get("is-referenced-by-count", 0),
                    "score": item["score"],
                    "title": item.get("title", None),
                    "authors": item.get("author", []),
                    "journal": item.get("container-title", [None])[0],
                    "issue": item.get("issue", None),
                    "volume": item.get("volume", None),
                    "pages": item.get("page", None),
                    "date-parts": item.get("issued", {}).get("date-parts", [[None]]),
                }
                for item in items
                if item["score"] > 40
            }

            num_refs = len(dois_to_item)
            sorted_dois = sorted(
                list(dois_to_item.keys()),
                key=lambda doi: -dois_to_item[doi]["cited-by"],
            )

            if self.use_crossref_formatting:
                # use Crossref to retrieve pre-formatted text

                # remove leading "1. " from Science CSL style
                refs = {
                    doi: content_negotiation(ids=doi, format="text", style="science")[
                        3:
                    ]
                    for doi in dois_to_item.keys()
                }
                self.logger.debug(
                    f"Retrieved {len(refs)} formatted references from Crossref."
                )
                md = "  \n\n".join(
                    f"> [{refs[doi]}](https://dx.doi.org/{doi}) "
                    f"Cited by {dois_to_item[doi]['cited-by']}."
                    for doi in sorted_dois
                )
                formatted_references = dcc.Markdown(
                    md, className="mpc-markdown"
                )

            else:
                # else retrieve BibTeX entries to extract a nice author list
                # and perform our own formatting

                entries = {
                    doi: content_negotiation(ids=doi, format="bibtex")
                    for doi in sorted_dois
                }

                formatted_entries = []
                for doi, entry in entries.items():
                    author_string = self._bibtex_entry_to_author_text(entry)
                    journal_div = self._item_to_journal_div(dois_to_item[doi])

                    formatted_entries.append(
                        html.Blockquote(
                            [
                                html.A(
                                    [
                                        html.Div(
                                            [
                                                html.I(
                                                    # necessary since titles can contain HTML for superscripts etc.
                                                    dcc.Markdown(
                                                        dois_to_item[doi]["title"],
                                                        dangerously_allow_html=True
                                                    )
                                                )
                                            ]
                                        ),
                                        html.Div([author_string]),
                                        html.Div(
                                            [
                                                journal_div,
                                                html.Span(
                                                    f" Cited by {dois_to_item[doi]['cited-by']}."
                                                ),
                                            ]
                                        ),
                                    ],
                                    href=f"https://dx.doi.org/{doi}",
                                )
                            ],
                            className="mpc",
                            style={"padding-left": "1rem", "margin-bottom": "1rem"}
                        )
                    )

                formatted_references = html.Div(formatted_entries)
        else:
            # this uses pybtex directly on stored BibTeX entries from MP
            # most-accurate references and faster since no Crossref lookup
            # is required but no dois/hyperlinks available
            all_entries = {}
            for references in all_references:
                all_entries.update(Parser().parse_string(references).entries)
            md = self._pybtex_entries_to_markdown(all_entries)
            formatted_references = dcc.Markdown(md, className="mpc-markdown")
            num_refs = len(all_entries)

        return html.Div(
            [
                Label(f"{num_refs} references found{':' if num_refs>0 else '.'}"),
                formatted_references,
            ],
            style={"max-height": "20rem", "overflow-y": "scroll"},
        )
Beispiel #30
0
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS']:
        Dictionary that contains bibliographies:
          The key denotes the bibliographies name to use in headers
          The values describe the BibTeX files to read
        Mandatory for this plugin.
    generator.settings['PUBLICATIONS_NAVBAR']:
        Bool denoting whether a navigation bar containing links to each bibliography should be produced.
        Defaults to 'True'.
    generator.settings['PUBLICATIONS_HEADER']:
        Bool denoting whether a header (h2) should be produced for each bibliography.
        Defaults to 'True'.
    generator.settings['PUBLICATIONS_SPLIT']:
        Bool denoting whether bibliographies should be split by year (h3).
        Defaults to 'True'.
    generator.settings['PUBLICATIONS_HIGHLIGHTs']:
        String, e.g., a name, that will be entailed in a <strong> tag to highlight.
        Default: empty

    Output
    ------
    generator.context['publications']:
        Dictionary containing the name of the publication list a a key, bibliography entries as a value.
        A bibliography entry contains of a list of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """

    if 'PUBLICATIONS' not in generator.settings:
        return
    if 'PUBLICATIONS_NAVBAR' not in generator.settings:
        generator.context['PUBLICATIONS_NAVBAR'] = True

    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs = generator.settings['PUBLICATIONS']
    generator.context['publications'] = collections.OrderedDict()

    for rid in refs:
        ref = refs[rid]
        bibfile = os.path.join(generator.settings['PATH'], ref['file'])
        try:
            bibdata_all = Parser().parse_file(bibfile)
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (bibfile, str(e)))
            return

        if 'title' in ref:
            title = ref['title']
        else:
            title = rid

        if 'header' in ref:
            header = ref['header']
        else:
            header = True

        if 'split' in ref:
            split = ref['split']
        else:
            split = True

        if 'split_link' in ref:
            split_link = ref['split_link']
        else:
            split_link = True

        if 'bottom_link' in ref:
            bottom_link = ref['bottom_link']
        else:
            bottom_link = True

        if 'all_bibtex' in ref:
            all_bibtex = ref['all_bibtex']
        else:
            all_bibtex = False

        if 'highlight' in ref:
            highlights = ref['highlight']
        else:
            highlights = []

        if 'group_type' in ref:
            group_type = ref['group_type']
        else:
            group_type = False

        publications = []

        # format entries
        plain_style = plain.Style()
        html_backend = html.Backend()
        formatted_entries = plain_style.format_entries(
            bibdata_all.entries.values())

        for formatted_entry in formatted_entries:
            key = formatted_entry.key
            entry = bibdata_all.entries[key]
            year = entry.fields.get('year')
            typee = entry.type

            if entry.fields.get('tags'):
                tags = [
                    tag.strip() for tag in entry.fields.get('tags').split(';')
                ]
            else:
                tags = []

            display_tags = [
                x for x in tags if x != "doi-open" and x != "url-open"
            ]

            # This shouldn't really stay in the field dict
            # but new versions of pybtex don't support pop
            pdf = entry.fields.get('pdf', None)
            slides = entry.fields.get('slides', None)
            poster = entry.fields.get('poster', None)
            doi = entry.fields.get('doi', None)
            url = entry.fields.get('url', None)

            #clean fields from appearing in bibtex and on website
            entry_tmp = entry
            for to_del in ['pdf', 'slides', 'poster', 'tags']:
                entry_tmp.fields.pop(to_del, None)

            #render the bibtex string for the entry
            bib_buf = StringIO()
            bibdata_this = BibliographyData(entries={key: entry_tmp})
            Writer().write_stream(bibdata_this, bib_buf)

            #clean more fields from appearing on website
            for to_del in ['doi', 'url']:
                entry_tmp.fields.pop(to_del, None)

            entry_clean = next(
                plain_style.format_entries(bibdata_this.entries.values()),
                None)

            # apply highlight (strong)
            text = entry_clean.text.render(html_backend)
            for replace in highlights:
                text = text.replace(replace,
                                    '<strong>' + replace + '</strong>')

            publications.append(
                (key, typee, year, text, tags, display_tags,
                 bib_buf.getvalue(), pdf, slides, poster, doi, url))

        generator.context['publications'][rid] = {}
        generator.context['publications'][rid]['title'] = title
        generator.context['publications'][rid]['path'] = os.path.basename(
            bibfile)
        generator.context['publications'][rid]['header'] = header
        generator.context['publications'][rid]['split'] = split
        generator.context['publications'][rid]['bottom_link'] = bottom_link
        generator.context['publications'][rid]['split_link'] = split_link
        generator.context['publications'][rid]['all_bibtex'] = all_bibtex
        generator.context['publications'][rid][
            'data'] = collections.OrderedDict()
        if group_type:
            generator.context['publications'][rid]['data'] = sorted(
                publications,
                key=lambda pub:
                (-int(pub[2].replace("in press", "9999")), pub[1]))
        else:
            generator.context['publications'][rid]['data'] = sorted(
                publications,
                key=lambda pub: -int(pub[2].replace("in press", "9999")))
    def run(self):

        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_authors = self.options.get('highlight_author', None)
        if highlight_authors:
            highlight_authors = highlight_authors.split(';')
        style = Style(self.site.config['BASE_URL'] + detail_page_dir if detail_page_dir else None)
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        all_entries = []
        labels = set()
        for a in self.arguments:
            parser = Parser()
            for item in parser.parse_file(a).entries.items():
                if item[0] in labels:  # duplicated entries
                    LOGGER.warning(
                        ("publication_list: BibTeX entries with duplicated labels are found. "
                         "Only the first occurrence will be used."))
                    continue
                labels.add(item[0])
                all_entries.append(item)
        # Sort the publication entries by year reversed
        data = sorted(all_entries, key=lambda e: e[1].fields['year'], reverse=True)

        html = '<div class="publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.makedirs(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.makedirs(os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            entry.label = label  # Pass label to the style.
            pub_html = list(style.format_entries((entry,)))[0].text.render_as('html')
            if highlight_authors:  # highlight one of several authors (usually oneself)
                for highlight_author in highlight_authors:
                    # We need to replace all occurrence of space except for the last one with
                    # &nbsp;, since pybtex does it for all authors
                    count = highlight_author.count(' ') - 1
                    pub_html = pub_html.replace(
                        highlight_author.strip().replace(' ', '&nbsp;', count),
                        '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html

            extra_links = ""

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(entry.fields['fulltext'])

            bibtex_fields = dict(entry.fields)
            # Collect and remove custom links (fields starting with "customlink")
            custom_links = dict()
            for key, value in bibtex_fields.items():
                if key.startswith('customlink'):
                    custom_links[key[len('customlink'):]] = value
            # custom fields (custom links)
            for key, value in custom_links.items():
                extra_links += '[<a href="{}">{}</a>] '.format(value, key)

            # Remove some fields for the publicly available BibTeX file since they are mostly only
            # used by this plugin.
            for field_to_remove in ('abstract', 'fulltext'):
                if field_to_remove in bibtex_fields:
                    del bibtex_fields[field_to_remove]
            # Prepare for the bib file. Note detail_page_dir may need bib_data later.
            bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons)
            bib_data = BibliographyData(dict({label: bibtex_entry}))
            bib_string = bib_data.to_string('bibtex')
            extra_links += '''
            [<a href="javascript:void(0)" onclick="
            (function(target, id) {{
              if ($('#' + id).css('display') == 'block')
              {{
                $('#' + id).hide('fast');
                $(target).text('BibTeX&#x25BC;')
              }}
              else
              {{
                $('#' + id).show('fast');
                $(target).text('BibTeX&#x25B2;')
              }}
            }})(this, '{}');">BibTeX&#x25BC;</a>]
            '''.format('bibtex-' + label)
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex')

            if extra_links or detail_page_dir or 'abstract' in entry.fields:
                html += '<br>'

            # Add the abstract link.
            if 'abstract' in entry.fields:
                html += '''
                [<a href="javascript:void(0)" onclick="
                (function(target, id) {{
                  if ($('#' + id).css('display') == 'block')
                {{
                  $('#' + id).hide('fast');
                  $(target).text('abstract&#x25BC;')
                }}
                else
                {{
                  $('#' + id).show('fast');
                  $(target).text('abstract&#x25B2;')
                }}
                }})(this, '{}');">abstract&#x25BC;</a>] '''.format('abstract-' + label)

            display_none = '<div id="{}" style="display:none"><pre>{}</pre></div>'
            bibtex_display = display_none.format(
                'bibtex-' + label, bib_string)

            abstract_text = str(
                LaTeXParser(entry.fields['abstract']).parse()) if 'abstract' in entry.fields else ''
            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += '[<a href="{}">details</a>] '.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title': str(LaTeXParser(entry.fields['title']).parse()),
                    'abstract': abstract_text,
                    'bibtex': bib_data.to_string('bibtex'),
                    'bibtex_link': '/' + bib_link if bibtex_dir else '',
                    'default_lang': self.site.config['DEFAULT_LANG'],
                    'label': label,
                    'lang': self.site.config['DEFAULT_LANG'],
                    'permalink': self.site.config['SITE_URL'] + page_url,
                    'reference': pub_html,
                    'extra_links': extra_links + bibtex_display
                }

                if 'fulltext' in entry.fields:
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')),
                    context,
                )

            html += extra_links

            # Add the hidden abstract and bibtex.
            if 'abstract' in entry.fields:
                html += '''
                <div id="{}" class="publication-abstract" style="display:none">
                <blockquote>{}</blockquote></div>
                '''.format('abstract-' + label, abstract_text)
            html += bibtex_display
            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [nodes.raw('', html, format='html'), ]
Beispiel #32
0
def verifyBib(bibfile, verifiedfile="", unverifiedfile=""):
    """Verifies a bib file according to the present doi or to a doi found in internet. Returns a filename.verified.bib and  filename.unverified.bib"""
    if verifiedfile == "":
        verifiedfile = bibfile + ".verified.bib"
    if unverifiedfile == "":
        unverifiedfile = bibfile + ".unverified.bib"
        
    with open(bibfile,"r") as f:
        bstr = f.read()
    # Assume the input is utf8 encoded
    bstr = bstr.decode('utf8')

    # Parse the bibtex file
    bib_parser = Parser()
    bib_data = bib_parser.parse_stream(StringIO(bstr))

    #first I groom the original bib file:
    bib_data=cleanBibliographyData(bib_data)
    
    
    newbib_parser = Parser()
    unverif_parser = Parser()
    #bib_verifdata = bib_parser.parse_stream(StringIO(""))
    entrynumber=0
    check =0      
    for (key,entry) in bib_data.entries.items():
        check = check+1
        print "-------------------------------------"
        print "Checking entry %d of %d" % (check,len(bib_data.entries))
        print "Entry: %s..." % key
        title = entry.fields['title'] if entry.fields.has_key('title') else ""
        author = entry.fields['author'] if entry.persons.has_key('author') else ""
        journal = entry.fields[_journal_field] if entry.fields.has_key(_journal_field) else ""
        volume = entry.fields['volume'] if entry.fields.has_key('volume') else ""
        pages = entry.fields['pages'] if entry.fields.has_key('pages') else ""
        print "('%s' by %s)" % (title,author)
        #fix from here
        """first look for doi in entry if not use doi_finder, 
        if doi is found, 
        then download entryfrom internet and 
        hten compare with the actual entry, ask for changes
        """
        doi=None
        if 'doi' in entry.fields:
            doi = entry.fields['doi']
            print "doi found in bib file..."
            verificationmode="from given doi"
        else: #if 'doi' is not in entry.fields, we try to find it
            print "Looking for doi in crossref..."
            doi = doi_finder.crossref_auth_title_to_doi(doi_finder.detex(author), doi_finder.detex(title))
            verificationmode="from searched doi in crossref"
            
            if  doi != None:
                print "doi found in crossref"
            else:
                print "Looking for doi in google..."
                doi = doi_finder.google_doi(journal, volume, pages, doi_finder.detex(title))
                verificationmode="from searched doi in google"
                if doi != None:
                    print "doi found in google scholars"
                
        if  doi != None:
            print "Retrieving fields from internet..."
            entriesfrominternet = doi2biblatex(doi)
            ##check if it's the right entry:
            
            if entriesfrominternet != None and len(entriesfrominternet.entries.keys())>0  and entriesfrominternet.entries[entriesfrominternet.entries.keys()[0]].fields.has_key("title"):
                titlefrominternet= entriesfrominternet.entries[entriesfrominternet.entries.keys()[0]].fields["title"]  
                if doi_finder.fuzzy_match(title.lower(), titlefrominternet.lower()) < .9:
                    usedoi = raw_input("maybe wrong entry... Title from internet is %s. Is it right? y/[N]:" % titlefrominternet)
                    if usedoi=="" or usedoi[0].capitalize()=="N":
                        print "Avoiding entry..."     
                        entriesfrominternet = None 
                    else:
                        print "Using entry from internet..."     
                    
            else:
                print "incomplete data from internet, avoiding entry..."     
                entriesfrominternet = None 
                          

            
        else:
            print "doi not found, avoiding entry..."
            entriesfrominternet = None
            
        if entriesfrominternet !=None:
            fieldsfrominternet = entriesfrominternet.entries[entriesfrominternet.entries.keys()[0]].fields    #It will contain only one entry, so I take the fields from the first one
            personsfrominternet = entriesfrominternet.entries[entriesfrominternet.entries.keys()[0]].persons     #authors, editors, etc   
            verifiedfields=""
            #example            
            """
            Entry(u'article', fields={u'doi': u'10.1016/S0364-0213(99)80005-6', u'title': u'{A probabilistic model of lexical and syntactic access and disambiguation}',
            u'url': u'http://doi.wiley.com/10.1016/S0364-0213(99)80005-6', u'journaltitle': u'Cognitive Science', u'issn': u'03640213', 
            u'mendeley-tags': u'Phd1,expectations,predictions', u'number': u'2', u'month': u'06', u'volume': u'2000', 
            u'file': u':home/bruno/Documents/Papers//Jurafsky - 1996 - A probabilistic model of lexical and syntactic access and disambiguation.pdf:pdf', u'year': u'1996',
            u'keywords': u'Phd1,expectations,predictions', u'pages': u'7--194'}, 
            persons={u'author': [Person(u'Jurafsky, D')]})
            """
                        
            for persons in personsfrominternet:
                verifiedfields = persons +"; "+verifiedfields
               
                if entry.persons.has_key(persons): #check if there's a person field like author, maybe editor
                   # unicode(entry.persons[persons][0]) == "apellido, nombre"
                    if entry.persons[persons] != personsfrominternet[persons]: #then compare with the old one and ask
                        #First checks for badly bwritten entries:
                        for i in range(0,min(len(entry.persons[persons]),len(personsfrominternet[persons]))): #checks the common persons
                            lastbib = entry.persons[persons][i].get_part_as_text("last")
                            lastinet = personsfrominternet[persons][i].get_part_as_text("last")
                            firstbib = entry.persons[persons][i].get_part_as_text("first")
                            firstinet = personsfrominternet[persons][i].get_part_as_text("first")
                            middlebib = entry.persons[persons][i].get_part_as_text("middle")
                            middleinet = personsfrominternet[persons][i].get_part_as_text("middle")                       
                            lineagebib =  entry.persons[persons][i].get_part_as_text("lineage")
                            lineageinet = personsfrominternet[persons][i].get_part_as_text("lineage")                       
                            prelastbib =  entry.persons[persons][i].get_part_as_text("prelast")
                            prelastinet = personsfrominternet[persons][i].get_part_as_text("prelast")                       
                            
                            #check if the name in internet has less info that the one stored
                            if lastbib==lastinet and (firstbib == firstinet[0] or firstbib == firstinet[0]+"." ):
                                print "incomplete last name in internet for %s - Skipping name %s ..." % (unicode(entry.persons[persons][i]),unicode(personsfrominternet[persons][i]))
                            else: #if there's the same amount of info then check the names
                                if unicode(entry.persons[persons][i]).strip() != unicode(personsfrominternet[persons][i]).strip():
                                    entry.persons[persons][i] = changeThisforThat(entry.persons[persons][i],personsfrominternet[persons][i],"Change for %s." % persons)
                        #check for missing or extra authors        
                        if len(entry.persons[persons]) < len(personsfrominternet[persons]): # missing authors in bib file
                            for j in range(i+1,len(personsfrominternet[persons]) ):
                                missing = changeThisforThat(None,personsfrominternet[persons][j],"Missing person for %s." % persons)
                                if missing != None:                                            
                                    entry.persons[persons].append(missing)
                        elif len(entry.persons[persons]) > len(personsfrominternet[persons]): # extra authors in bib file
                            for j in range(i+1,len(entry.persons[persons]) ):
                                extra = changeThisforThat(entry.persons[persons][j],None,"Extra person for %s." % persons)
                                if extra == None:
                                    entry.persons[persons].remove(entry.persons[persons][j])
                            
                        
                else: #if there's  new field online, the field has to be added
                    entry.fields[persons] = fieldsfrominternet[persons]                
                    
                
            for fieldname in fieldsfrominternet:
                verifiedfields = fieldname +"; "+verifiedfields 
                if entry.fields.has_key(fieldname): #i check if there are new fields first
                    if entry.fields[fieldname] != fieldsfrominternet[fieldname]: #then compare with the old ones and ask
                          #don't try to change the title if it's in title case:
                          if not (fieldname== "title" and fieldsfrominternet[fieldname] == titlecase.titlecase(fieldsfrominternet[fieldname])):
                              entry.fields[fieldname] = changeThisforThat(entry.fields[fieldname],fieldsfrominternet[fieldname],"Change for %s." % fieldname)
                          else:
                              print "avoiding title '%s' because of titlecase" % fieldsfrominternet[fieldname]
                else: #if there's  new field online, the field has to be added
                    entry.fields[fieldname] = fieldsfrominternet[fieldname]
            
            
  
            textverif = "%s were verified %s" % (verifiedfields,verificationmode)
            entry.fields["citation-verif"] = textverif
            #add to new bib            
            newbib_parser.data.add_entry(key, entry)   #these are the verified entries
            entrynumber=entrynumber+1
        else: # if cit.citation() =={}: #if we failed to find it in internet 
            unverif_parser.data.add_entry(key, entry)

    savebib(unverif_parser.data,unverifiedfile)
    savebib(newbib_parser.data,verifiedfile)
    print "%s/%s entries were checked. %s were verified" % (check, len(bib_data.entries), entrynumber)
    return "done"
Beispiel #33
0
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        LOGGER.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    try:
        bib_items = Parser().parse_file(generator.settings['PUBLICATIONS_SRC'])
    except PybtexError as err:
        LOGGER.warn('`pelican_bibtex` failed to parse file %s: %s',
                    generator.settings['PUBLICATIONS_SRC'],
                    str(err))
        return

    publications = []

    for fmt_entry in plain.Style().format_entries(bib_items.entries.values()):
        key = fmt_entry.key
        entry = bib_items.entries[key]

        # Render the bibtex string for the entry
        buf = StringIO()
        Writer().write_stream(BibliographyData(entries={key: entry}), buf)

        # Prettify BibTeX entries
        text = fmt_entry.text.render(html.Backend())
        text = text.replace(r"\{", "").replace(r"\}", "")
        text = text.replace("{", "").replace("}", "")

        publications.append({'bibtex' : buf.getvalue(),
                             'doi'    : get_field(entry, 'doi'),
                             'entry'  : entrytype(entry.type),
                             'key'    : key,
                             'pdf'    : get_field(entry, 'pdf'),
                             'poster' : get_field(entry, 'poster'),
                             'slides' : get_field(entry, 'slides'),
                             'text'   : text,
                             'url'    : get_field(entry, 'url'),
                             'note'    : get_field(entry, 'note'),
                             'year'   : entry.fields.get('year'),
                             'authorizer': get_field(entry, 'authorizer'),
                             'acceptance': get_field(entry, 'acceptance'),
                             'stats': get_field(entry, 'stats')
                             })

    generator.context['publications'] = publications
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        #from pybtex.style.formatting import plain
        from rahul_style import Style as RahulStyle

    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                    (refs_file, str(e)))
        return

    publications = []

    # format entries
    plain_style = RahulStyle()
    #plain_style = plain.Style()
    html_backend = html.Backend()

    html_backend.symbols['br'] = u'<BR/>'

    all_entries = bibdata_all.entries.values()

    # remove URL field if DOI is present
    for entry in all_entries:
        if "doi" in entry.fields.keys():
            entry.fields._dict["url"] = ""

    formatted_entries = plain_style.format_entries(all_entries)
    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        pub_type = entry.type
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        #slides = entry.fields.get('slides', None)
        #poster = entry.fields.get('poster', None)
        doi = entry.fields.get('doi', None)
        url = entry.fields.get('url', None)
        arxiv = entry.fields.get('arxiv', None)

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)

        text = formatted_entry.text.render(html_backend)

        # prettify entries
        # remove BibTeX's {}
        text = text.replace("\{", "")
        text = text.replace("{", "")
        text = text.replace("\}", "")
        text = text.replace("}", "")
        # remove textbf used for cv
        text = text.replace("\\textbf ", "")
        # remove \ that comes after Proc.
        text = text.replace("\\", "")

        publications.append((pub_type, key, year, text, bib_buf.getvalue(),
                             pdf, doi, url, arxiv))

    generator.context['publications'] = publications
Beispiel #35
0
def _test(bibtex_input, correct_result):
    parser = Parser(encoding='UTF-8')
    parser.parse_stream(StringIO(bibtex_input))
    result = parser.data
    assert result == correct_result
import logging
logger = logging.getLogger(__name__)

import os
import codecs

from pybtex.database.input.bibtex import Parser

PUBLICATION_DIR = 'content/publications'

# Iterate over all the files in the PUBLICATION_DIR
for file in os.listdir(PUBLICATION_DIR):
    logger.warn( '[BIB] Trying to parse {}...'.format(file))
    # Try parsing it, should not crash
    with codecs.open(PUBLICATION_DIR+os.sep+file, 'r', encoding="utf8") as stream:
        assert len(stream.read()) > 0 
        bibdata = Parser().parse_stream(stream)
Beispiel #37
0
    def run(self):

        style = find_plugin('pybtex.style.formatting',
                            self.options.get('style', 'unsrt'))()
        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_authors = self.options.get('highlight_author', None)
        if highlight_authors:
            highlight_authors = highlight_authors.split(';')
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'],
                      reverse=True)

        html = '<div class="publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.mkdir(
                    os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            pub_html = list(style.format_entries(
                (entry, )))[0].text.render_as('html')
            if highlight_authors:  # highlight one of several authors (usually oneself)
                for highlight_author in highlight_authors:
                    pub_html = pub_html.replace(
                        highlight_author.strip(),
                        '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html

            extra_links = ""
            bibtex_fields = dict(entry.fields)
            # Remove some fields for the publicly available BibTeX file since they are mostly only
            # used by this plugin.
            for field_to_remove in ('abstract', 'fulltext'):
                if field_to_remove in bibtex_fields:
                    del bibtex_fields[field_to_remove]
            bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons)
            # detail_page_dir may need bib_data later
            bib_data = BibliographyData(dict({label: bibtex_entry}))
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]),
                                 'bibtex')
                extra_links += '[<a href="{}">BibTeX</a>] '.format(
                    self.site.config['BASE_URL'] + bib_link)

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(
                    entry.fields['fulltext'])

            if extra_links or detail_page_dir:
                html += '<br>'
            html += extra_links

            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += ' [<a href="{}">abstract and details</a>]'.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title':
                    str(LaTeXParser(entry.fields['title']).parse()),
                    'abstract':
                    str(LaTeXParser(entry.fields['abstract']).parse())
                    if 'abstract' in entry.fields else '',
                    'bibtex':
                    bib_data.to_string('bibtex'),
                    'bibtex_link':
                    '/' + bib_link if bibtex_dir else '',
                    'default_lang':
                    self.site.config['DEFAULT_LANG'],
                    'label':
                    label,
                    'lang':
                    self.site.config['DEFAULT_LANG'],
                    'permalink':
                    self.site.config['SITE_URL'] + page_url,
                    'reference':
                    pub_html,
                    'extra_links':
                    extra_links
                }

                if 'fulltext' in entry.fields and entry.fields[
                        'fulltext'].endswith('.pdf'):
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir,
                                      label + '.html')),
                    context,
                )

            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [
            nodes.raw('', html, format='html'),
        ]
Beispiel #38
0
    def add_publications(self):
        # Check if PUBLICATIONS_SRC is set
        if 'PUBLICATIONS_SRC' not in self.settings:
            logger.warn('PUBLICATIONS_SRC not set')
            return

        # Try to parse the bibtex files
        pub_dir = self.settings['PUBLICATIONS_SRC']
        try:
            bibdata_all = BibliographyData()
            for file in os.listdir(pub_dir):
                with codecs.open(pub_dir + os.sep + file, 'r',
                                 encoding="utf8") as stream:
                    bibdata = Parser().parse_stream(stream)
                    key, entry = bibdata.entries.items()[0]
                    bibdata_all.entries[key] = entry
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (file, str(e)))
            return

        # Create Publication objects and add them to a list
        publications = []

        # format entries
        plain_style = plain.Style()
        formatted_entries = list(
            plain_style.format_entries(bibdata_all.entries.values()))

        decoder = latexcodec.lexer.LatexIncrementalDecoder()

        for entry in bibdata_all.entries:
            raw_tex = BibliographyData(entries={
                entry: bibdata_all.entries[entry]
            }).to_string('bibtex')
            #raw_tex += '\n}'
            formatted_entry = list(
                plain_style.format_entries([bibdata_all.entries[entry]]))[0]

            key = formatted_entry.key
            entry = bibdata_all.entries[key]

            year = entry.fields.get('year', 2018)

            authors = entry.fields.get('author', '').split(' and ')
            print(authors)
            parsed_authors = []
            for author in authors:
                if ',' in author:
                    parsed_authors.append(LatexNodes2Text().latex_to_text(
                        re.sub(r'[\{\}]', '', (author.split(',')[1] + ' ' +
                                               author.split(',')[0]).strip())))
                else:
                    parsed_authors.append(
                        LatexNodes2Text().latex_to_text(author))
            authors = parsed_authors

            title = LatexNodes2Text().latex_to_text(
                entry.fields.get('title', ''))

            pdf = entry.fields.get('pdf', None)
            slides = entry.fields.get('slides', None)
            poster = entry.fields.get('poster', None)

            where = ''
            if 'booktitle' in entry.fields:
                where = LatexNodes2Text().latex_to_text(
                    entry.fields.get('booktitle'))
            elif 'journal' in entry.fields:
                where = LatexNodes2Text().latex_to_text(
                    entry.fields.get('journal'))

            abstract = entry.fields.get('abstract', '')

            pub = Publication(key,
                              authors,
                              title,
                              year,
                              where,
                              abstract=abstract,
                              pdf_url=pdf,
                              resource_urls=[('slides', slides),
                                             ('poster', poster)])
            pub.citations['bib'] = raw_tex.rstrip('\r\n')
            publications.append(pub)
            self.publications_per_year[pub.year].append(pub)
            for author in authors:
                if author in self.context['MEDIUS_AUTHORS'].keys():
                    self.publications_per_author[author].append(pub)
            self.publications_per_type[BIBTEX_TYPE_TO_TEXT[entry.type]].append(
                pub)
            self.publications_per_type_rev[pub] = BIBTEX_TYPE_TO_TEXT[
                entry.type]

        return publications
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.error('`pelican_bibtex` failed to parse file %s: %s' % (
            refs_file,
            str(e)))
        exit(1)
        return

    publications = []

    # format entries
    plain_style = plain.Style()
    html_backend = html.Backend()
    formatted_entries = plain_style.format_entries(bibdata_all.entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        year = entry.fields.get('year')
        XEcategory = entry.fields.get('XEcategory')
        XEmember = entry.fields.get('XEmember')
        XEProject = entry.fields.get('XEProject')
        url = entry.fields.get('XEurl')

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)
        text = formatted_entry.text.render(html_backend)

        # publications.append((key,
        #                      year,
        #                      text,
        #                      url,
        #                      XEmember,
        #                      XEcategory,
        #                      XEProject
        #                      ))
        publications.append({'key'    : key,
                             'year'   : year,
                             'text'   : text,
                             'url'    : url,
                             'XEmember' : XEmember,
                             'XEcategory' : XEcategory,
                             'XEProject' : XEProject})

    generator.context['publications'] = publications
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, url, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                    (refs_file, str(e)))
        return

    # format entries
    plain_style = MyStyle()
    html_backend = html.Backend()
    formatted_entries = plain_style.format_entries(
        bibdata_all.entries.values())

    publications = []
    reports = []
    unpublished = []
    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]

        try:
            year = int(entry.fields.get('year', None))
        except TypeError:
            year = None

        journal = entry.fields.get('journal', "")

        sort_key = (year, journal)

        # Render the bibtex string for the entry.
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)

        text = formatted_entry.text.render(html_backend)

        entry_res = (key, text, bib_buf.getvalue(), sort_key)

        if entry.type == 'article':
            publications.append(entry_res)
        elif entry.type == 'unpublished':
            unpublished.append(entry_res)
        else:
            reports.append(entry_res)

    generator.context['publications'] = sorted(publications,
                                               key=itemgetter(-1),
                                               reverse=True)
    generator.context['reports'] = sorted(reports,
                                          key=itemgetter(-1),
                                          reverse=True)
    generator.context['unpublished'] = sorted(unpublished,
                                              key=itemgetter(-1),
                                              reverse=True)
Beispiel #41
0
def add_publications_to_context(generator, refs_files, refs_string=None, pybtex_style_args={}):
    """ Populates context with a list of BibTeX publications. """
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import BaseStyle, plain
    except ImportError:
        logger.warn('`pelican_bib` failed to load dependency `pybtex`')
        return

    decorate_html = generator.settings.get('PUBLICATIONS_DECORATE_HTML', False)

    plugin_path = generator.settings.get('PUBLICATIONS_PLUGIN_PATH', 'plugins')
    import sys
    sys.path.append(plugin_path)

    kwargs = generator.settings.get('PUBLICATIONS_STYLE_ARGS', {})
    kwargs.update(pybtex_style_args)
    style_type = get_style_type(plain.Style, decorate_html)
    style = style_type(**kwargs)

    if generator.settings.get('PUBLICATIONS_CUSTOM_STYLE', False):
        try:
            from pybtex_plugins import PelicanStyle
            if not isinstance(PelicanStyle, type) or not issubclass(PelicanStyle, BaseStyle):
                raise TypeError()
            style_type = get_style_type(PelicanStyle, decorate_html)
            style = style_type(**kwargs)
        except ImportError as e:
            logger.warn(str(e))
            logger.warn('pybtex_plugins.PelicanStyle not found, using Pybtex plain style')
        except TypeError:
            logger.warn('PelicanStyle must be a subclass of pybtex.style.formatting.BaseStyle')

    # collect entries
    bibdata_entries = {}
    for file in refs_files:
        bibdata_entries.update(Parser().parse_file(file).entries)
    if refs_string:
        bibdata_entries.update(Parser().parse_string(refs_string).entries)

    publications = []
    publications_lists = {}
    publications_untagged = []

    split_by = generator.settings.get('PUBLICATIONS_SPLIT_BY', None)
    untagged_title = generator.settings.get('PUBLICATIONS_UNTAGGED_TITLE', None)

    # format entries
    html_backend = html.Backend()
    formatted_entries = style.format_entries(bibdata_entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_entries[key]
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        slides = entry.fields.get('slides', None)
        poster = entry.fields.get('poster', None)

        tags = []
        if split_by:
            tags = entry.fields.get(split_by, [])

            # parse to list, and trim each string
            if tags:

                tags = [tag.strip() for tag in tags.split(',')]

                # create keys in publications_lists if at least one
                # tag is given
                for tag in tags:
                    publications_lists[tag] = publications_lists.get(tag, [])


        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)

        # convert decorated html tags
        # `<:bib-xyz>abc</:bib-xyz>` => `<span class="bib-xyz">abc</span>`
        text = formatted_entry.text.render(html_backend)
        text = replace(r'<:([^>]*)>', r'<span class="\1">', text)
        text = replace(r'</:([^>]*)>', r'</span>', text)

        entry_tuple = {'key': key,
                       'year': year,
                       'text': text,
                       'bibtex': bib_buf.getvalue(),
                       'pdf': pdf,
                       'slides': slides,
                       'poster': poster}
        entry_tuple.update(entry.fields)

        publications.append(entry_tuple)

        for tag in tags:
            publications_lists[tag].append(entry_tuple)

        if not tags and untagged_title:
            publications_untagged.append(entry_tuple)

    # append untagged list if title is given
    if untagged_title and publications_untagged:
        publications_lists[untagged_title] = publications_untagged


    # output
    generator.context['publications'] = publications
    generator.context['publications_lists'] = publications_lists