Beispiel #1
0
def format_pandoc(entries, csl_path):
    """
    Format the entries using pandoc

    Args:
        entries (dict): dictionary of entries
        csl_path (str): path to formatting CSL Fle
    Returns:
        references (dict): dictionary of citation texts
    """
    pandoc_version = tuple(
        int(ver) for ver in pypandoc.get_pandoc_version().split("."))
    citations = OrderedDict()
    for key, entry in entries.items():
        bibtex_string = BibliographyData(entries={
            entry.key: entry
        }).to_string("bibtex")
        if pandoc_version >= (2, 11):
            citations[key] = _convert_pandoc_new(bibtex_string, csl_path)
        else:
            citations[key] = _convert_pandoc_legacy(bibtex_string, csl_path)

    return citations
Beispiel #2
0
class KeyParsingTest(ParserTest, TestCase):
    input_string = u"""
        # will not work as expected
        @article(test(parens1))

        # works fine
        @article(test(parens2),)

        # works fine
        @article{test(braces1)}

        # also works
        @article{test(braces2),}
    """
    correct_result = BibliographyData([
        ('test(parens1))', Entry('article')),
        ('test(parens2)', Entry('article')),
        ('test(braces1)', Entry('article')),
        ('test(braces2)', Entry('article')),
    ])
    errors = [
        "syntax error in line 5: ')' expected",
    ]
Beispiel #3
0
class FieldNamesTest(ParserTest, TestCase):
    input_string = u"""
        Check for characters allowed in field names
        Here the cite key is fine, but the field name is not allowed:
        ``You are missing a field name``
        @article{2010, 0author="Me"}

        Underscores allowed (no error)
        @article{2011, _author="Me"}

        Not so for spaces obviously (``expecting an '='``)
        @article{2012, author name = "Myself"}

        Or hashes (``missing a field name``)
        @article{2013, #name = "Myself"}

        But field names can start with +-.
        @article{2014, .name = "Myself"}
        @article{2015, +name = "Myself"}
        @article{2016, -name = "Myself"}
        @article{2017, @name = "Myself"}
    """
    correct_result = BibliographyData([
        ('2010', Entry('article')),
        ('2011', Entry('article', [('_author', 'Me')])),
        ('2012', Entry('article')),
        ('2013', Entry('article')),
        ('2014', Entry('article', [('.name', 'Myself')])),
        ('2015', Entry('article', [('+name', 'Myself')])),
        ('2016', Entry('article', [('-name', 'Myself')])),
        ('2017', Entry('article', [('@name', 'Myself')])),
    ])
    errors = [
        "syntax error in line 5: '}' expected",
        "syntax error in line 11: '=' expected",
        'syntax error in line 14: \'}\' expected',
    ]
Beispiel #4
0
class KeylessEntriesTest(ParserTest, TestCase):
    parser_options = {'keyless_entries': True}
    input_string = u"""
        @BOOK(
            title="I Am Jackie Chan: My Life in Action",
            year=1999
        )
        @BOOK()
        @BOOK{}

        @BOOK{
            title = "Der deutsche Jackie Chan Filmführer",
        }

    """
    correct_result = BibliographyData([
        ('unnamed-1',
         Entry('book', [('title', 'I Am Jackie Chan: My Life in Action'),
                        ('year', '1999')])),
        ('unnamed-2', Entry('book')),
        ('unnamed-3', Entry('book')),
        ('unnamed-4',
         Entry('book', [('title', u'Der deutsche Jackie Chan Filmführer')])),
    ])
class MacrosTest(ParserTest, TestCase):
    input_string = """
        @String{and = { and }}
        @String{etal = and # { {et al.}}}
        @Article(
            unknown,
            author = nobody,
        )
        @Article(
            gsl,
            author = "Gough, Brian"#etal,
        )
    """
    correct_result = BibliographyData({
        'unknown':
        Entry('article'),
        'gsl':
        Entry('article',
              persons={'author': [Person('Gough, Brian'),
                                  Person('{et al.}')]}),
    })
    errors = [
        'undefined string in line 6: nobody',
    ]
Beispiel #6
0
def main_cli():
    import argparse

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('bib_path',
                        metavar='BIB_PATH',
                        type=arg_is_file,
                        help=('Path to bibtex-formatted file.'))
    parser.add_argument('-k',
                        '--keywords',
                        nargs='+',
                        type=str,
                        default=["OaksPeerReviewed", "OaksCVPreprint"],
                        help=('Keywords for reference filter.'))

    args = parser.parse_args()

    bib_parser = bibtex.Parser()
    bib_data = bib_parser.parse_file(args.bib_path)

    filtered_bib_data = BibliographyData()
    for key, entry in bib_data.entries.items():
        kwords = [
            x.strip() for x in entry.fields.get('keywords', '').split(',')
        ]
        for kw in args.keywords:
            if kw in kwords:
                filtered_bib_data.add_entry(entry.key, entry)

    s = filtered_bib_data.to_string("bibtex")
    s = s.replace("= \"", "= {")
    s = s.replace("\",\n", "},\n")
    s = s.replace("\"\n", "}\n")
    sys.stdout.write(s)
Beispiel #7
0
    def populate(self, database_file, pdf_dir):
        """
        Collect all bibdata from all files into one big-ass database (self),
        rekeying the entries.  This method will also set ``publipy_biburl``,
        ``publipy_abstracturl``, and ``publipy_pdfurl`` attributes on the entries.
        Pdf files are copied to ``pdf_dir/pdf``.

        :param str database_file: File containing all bibliography data
        :param str pdf_dir: Directory filled with pdfs named by their\
            bibliography keys
        """
        self.publications = BibliographyData()
        publications = read_bibfile(database_file).entries

        # ensure the pdf directory exists
        if pdf_dir and not (self.prefix / Path('pdf')).exists():
            (self.prefix / Path('pdf')).mkdir()

        for oldkey, item in publications.items():
            key = generate_key_swe(item)
            if 'publipy_biburl' not in item.fields:
                item.fields['publipy_biburl'] = str(self.prefix / Path('bib') /
                                                    Path(key + '.bib'))
            if 'abstract' in item.fields:
                item.fields['publipy_abstracturl'] = str(
                    self.prefix / Path('abstracts') / Path(key + '.txt'))

            if pdf_dir and 'publipy_pdfurl' not in item.fields:
                pdf_path_old = Path(pdf_dir) / Path(oldkey + '.pdf')
                if pdf_path_old.exists() and pdf_path_old.is_file():
                    pdf_path_new = self.prefix / Path('pdf') / Path(key +
                                                                    '.pdf')
                    shutil.copy(str(pdf_path_old), str(pdf_path_new))
                    item.fields['publipy_pdfurl'] = str(pdf_path_new)

            self.add_entry(key, item)
Beispiel #8
0
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS']:
        Dictionary that contains bibliographies:
          The key denotes the bibliographies name to use in headers
          The values describe the BibTeX files to read
        Mandatory for this plugin.
    generator.settings['PUBLICATIONS_NAVBAR']:
        Bool denoting whether a navigation bar containing links to each bibliography should be produced.
        Defaults to 'True'.
    generator.settings['PUBLICATIONS_HEADER']:
        Bool denoting whether a header (h2) should be produced for each bibliography.
        Defaults to 'True'.
    generator.settings['PUBLICATIONS_SPLIT']:
        Bool denoting whether bibliographies should be split by year (h3).
        Defaults to 'True'.
    generator.settings['PUBLICATIONS_HIGHLIGHTs']:
        String, e.g., a name, that will be entailed in a <strong> tag to highlight.
        Default: empty

    Output
    ------
    generator.context['publications']:
        Dictionary containing the name of the publication list a a key, bibliography entries as a value.
        A bibliography entry contains of a list of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """

    if 'PUBLICATIONS' not in generator.settings:
        return
    if 'PUBLICATIONS_NAVBAR' not in generator.settings:
        generator.context['PUBLICATIONS_NAVBAR'] = True

    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs = generator.settings['PUBLICATIONS']
    generator.context['publications'] = collections.OrderedDict()

    for rid in refs:
        ref = refs[rid]
        bibfile = os.path.join(generator.settings['PATH'], ref['file'])
        try:
            bibdata_all = Parser().parse_file(bibfile)
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (bibfile, str(e)))
            return

        if 'title' in ref:
            title = ref['title']
        else:
            title = rid

        if 'header' in ref:
            header = ref['header']
        else:
            header = True

        if 'split' in ref:
            split = ref['split']
        else:
            split = True

        if 'split_link' in ref:
            split_link = ref['split_link']
        else:
            split_link = True

        if 'bottom_link' in ref:
            bottom_link = ref['bottom_link']
        else:
            bottom_link = True

        if 'all_bibtex' in ref:
            all_bibtex = ref['all_bibtex']
        else:
            all_bibtex = False

        if 'highlight' in ref:
            highlights = ref['highlight']
        else:
            highlights = []

        if 'group_type' in ref:
            group_type = ref['group_type']
        else:
            group_type = False

        publications = []

        # format entries
        plain_style = plain.Style()
        html_backend = html.Backend()
        formatted_entries = plain_style.format_entries(
            bibdata_all.entries.values())

        for formatted_entry in formatted_entries:
            key = formatted_entry.key
            entry = bibdata_all.entries[key]
            year = entry.fields.get('year')
            typee = entry.type

            if entry.fields.get('tags'):
                tags = [
                    tag.strip() for tag in entry.fields.get('tags').split(';')
                ]
            else:
                tags = []

            display_tags = [
                x for x in tags if x != "doi-open" and x != "url-open"
            ]

            # This shouldn't really stay in the field dict
            # but new versions of pybtex don't support pop
            pdf = entry.fields.get('pdf', None)
            slides = entry.fields.get('slides', None)
            poster = entry.fields.get('poster', None)
            doi = entry.fields.get('doi', None)
            url = entry.fields.get('url', None)

            #clean fields from appearing in bibtex and on website
            entry_tmp = entry
            for to_del in ['pdf', 'slides', 'poster', 'tags']:
                entry_tmp.fields.pop(to_del, None)

            #render the bibtex string for the entry
            bib_buf = StringIO()
            bibdata_this = BibliographyData(entries={key: entry_tmp})
            Writer().write_stream(bibdata_this, bib_buf)

            #clean more fields from appearing on website
            for to_del in ['doi', 'url']:
                entry_tmp.fields.pop(to_del, None)

            entry_clean = next(
                plain_style.format_entries(bibdata_this.entries.values()),
                None)

            # apply highlight (strong)
            text = entry_clean.text.render(html_backend)
            for replace in highlights:
                text = text.replace(replace,
                                    '<strong>' + replace + '</strong>')

            publications.append(
                (key, typee, year, text, tags, display_tags,
                 bib_buf.getvalue(), pdf, slides, poster, doi, url))

        generator.context['publications'][rid] = {}
        generator.context['publications'][rid]['title'] = title
        generator.context['publications'][rid]['path'] = os.path.basename(
            bibfile)
        generator.context['publications'][rid]['header'] = header
        generator.context['publications'][rid]['split'] = split
        generator.context['publications'][rid]['bottom_link'] = bottom_link
        generator.context['publications'][rid]['split_link'] = split_link
        generator.context['publications'][rid]['all_bibtex'] = all_bibtex
        generator.context['publications'][rid][
            'data'] = collections.OrderedDict()
        if group_type:
            generator.context['publications'][rid]['data'] = sorted(
                publications,
                key=lambda pub:
                (-int(pub[2].replace("in press", "9999")), pub[1]))
        else:
            generator.context['publications'][rid]['data'] = sorted(
                publications,
                key=lambda pub: -int(pub[2].replace("in press", "9999")))
Beispiel #9
0
    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = BibliographyData()
        self.__citations = set()
Beispiel #10
0
        else:
            if cur_year is None:
                html_output += "\t<ul>\n"
                cur_year = True

        pub_html = list(style.format_entries([entry]))[0].text.render_as("html")
        pub_html = pub_html.replace("\n", " ")
        if highlight_author:  # highlight an author (usually oneself)
            pub_html = pub_html.replace(highlight_author, "<strong>{}</strong>".format(highlight_author), 1)
        html_output += f'\t\t<li class="publication" id="{entry.key.replace(":", "_")}">\n\t\t\t' + pub_html

        extra_links = []
        if bibtex_dir:  # write bib files to bibtex_dir for downloading
            sanitised_label = label.replace(":", "_")
            bib_link = f"{bibtex_dir}/{sanitised_label}.bib"
            BibliographyData({label: entry}).to_file(bib_link, "bibtex")
            extra_links.append(f'[<a href="{root_dir}/{bib_link}">bibtex</a>]')

        if "file" in entry.fields:  # the link to the pdf file
            (a, filename, kind) = entry.fields["file"].split(":", 2)

            file_path = os.path.join("papers", filename)
            presentation_path = os.path.join("presentations", filename)

            extra_links.append(f'[<a href="{root_dir}/{file_path}">file</a>]')

            if os.path.exists(presentation_path):
                extra_links.append(f'[<a href="{root_dir}/{presentation_path}">presentation</a>]')

        if "dataset" in entry.fields:
            dataset_path = entry.fields["dataset"]
def write_result(output, filter_data, data):
    write_bib = BibliographyData({key:data[key] for key in filter_data})
    need_modification = write_bib.to_string('bibtex')
    with open(output, 'w', encoding='utf-8') as file:
        file.write(need_modification)
Beispiel #12
0
def run(folder_path, file_list, file_name_out, exclude_list, log_process):
    global merged_count

    if log_process:
        f_removed = open(
            os.path.join(folder_path, "BibFilesMerge_removed.csv"),
            "w",
            encoding="utf-8",
        )
        csv_removed = csv.writer(
            f_removed, quotechar='"', quoting=csv.QUOTE_ALL)
        csv_removed.writerow(
            ["cause", "source", "key", "doi", "author", "year", "title", "publish"]
        )
        f_final = open(
            os.path.join(folder_path, "BibFilesMerge_final.csv"), "w", encoding="utf-8"
        )
        csv_final = csv.writer(f_final, quotechar='"', quoting=csv.QUOTE_ALL)
        csv_final.writerow(
            ["key", "source", "doi", "author", "year",
                "title", "publish", "abstract"]
        )

    file_name_path_out = os.path.join(folder_path, file_name_out)
    bib_data_out = BibliographyData()

    total = 0
    merged_count = 0
    without_author = 0
    without_year = 0
    without_jornal = 0
    duplicates = 0
    excluded_from_bib = 0

    bib_data_to_exclude = {}

    for bib_file_name in file_list:
        bib_data = custom_parse_file(bib_file_name)
        print(
            "-" * 3,
            bib_file_name + ":",
            len(bib_data.entries.values()),
            " " * 30,
        )

        for entry in bib_data.entries.values():
            total += 1

            doi = get_entry_DOI(entry)
            author = get_entry_author(entry)
            year = get_entry_year(entry)
            title = get_entry_title(entry)
            publish = get_entry_publish(entry)

            found_entry_to_exclude = False
            for bib_file_name_exclude in exclude_list:
                if bib_file_name_exclude not in bib_data_to_exclude:
                    bib_data = custom_parse_file(bib_file_name_exclude)
                    bib_data_to_exclude[
                        bib_file_name_exclude
                    ] = bib_data.entries.values()

                for entry_exclude in bib_data_to_exclude[bib_file_name_exclude]:
                    if is_duplicated(entry_exclude, entry):
                        excluded_from_bib += 1
                        found_entry_to_exclude = True
                        break

                if found_entry_to_exclude:
                    break

            if found_entry_to_exclude:
                continue

            if not author:
                without_author += 1
                if log_process:
                    # cause;source;key;doi;author;year;title;publish
                    csv_removed.writerow(
                        [
                            "no author",
                            bib_file_name,
                            entry.key,
                            doi,
                            author,
                            year,
                            title,
                            publish,
                        ]
                    )
            elif not year:
                without_year = without_year + 1
                if log_process:
                    # cause;source;key;doi;author;year;title;publish
                    csv_removed.writerow(
                        [
                            "no year",
                            bib_file_name,
                            entry.key,
                            doi,
                            author,
                            year,
                            title,
                            publish,
                        ]
                    )
            elif not publish:
                without_jornal = without_jornal + 1
                if log_process:
                    # cause;source;key;doi;author;year;title;publish
                    csv_removed.writerow(
                        [
                            "no journal",
                            bib_file_name,
                            entry.key,
                            doi,
                            author,
                            year,
                            title,
                            publish,
                        ]
                    )
            else:
                key = entry.key.lower()
                print("Key " + key + " " * 30 + "\r", end="", flush=True)

                entry.fields["source"] = bib_file_name
                old_entry = None

                for entry_out in bib_data_out.entries.values():
                    if is_duplicated(entry_out, entry, True):
                        old_entry = entry_out
                        break

                if old_entry != None:
                    duplicates += 1

                    if log_process:
                        # cause;source;key;doi;author;year;title;publish
                        csv_removed.writerow(
                            [
                                "duplicate of next",
                                bib_file_name,
                                entry.key,
                                doi,
                                author,
                                year,
                                title,
                                publish,
                            ]
                        )

                        doi = get_entry_DOI(old_entry)
                        author = get_entry_author(old_entry)
                        year = get_entry_year(old_entry)
                        title = get_entry_title(old_entry)
                        publish = get_entry_publish(old_entry)
                        csv_removed.writerow(
                            [
                                "duplicate of prev",
                                old_entry.fields["source"],
                                old_entry.key,
                                doi,
                                author,
                                year,
                                title,
                                publish,
                            ]
                        )

                    bib_data_out.entries[old_entry.key] = merge_entry(
                        old_entry, entry)
                else:
                    while key in bib_data_out.entries.keys():
                        key = key + "_a"
                    bib_data_out.entries[key] = entry

    print(" " * 50)
    print("Total:\t\t\t", total)

    print("No Author:\t\t", without_author)
    print("No Year:\t\t", without_year)
    print("No Publisher:\t\t", without_jornal)

    print("Duplicates:\t\t", duplicates)
    print("Merged:\t\t\t", merged_count)
    print("Excluded from bib:\t", excluded_from_bib)
    print("Final:\t\t\t", len(bib_data_out.entries))

    without_abstract_list = {i: 0 for i in file_list}
    without_abstract = 0
    for entry in bib_data_out.entries.values():
        if log_process:
            doi = get_entry_DOI(entry)
            author = get_entry_author(entry)
            year = get_entry_year(entry)
            title = get_entry_title(entry)
            publish = get_entry_publish(entry)
            abstract = get_entry_abstract(entry)

            # key;source;doi;author;year;title;publish;abstract
            csv_final.writerow(
                [
                    entry.key,
                    entry.fields["source"],
                    doi,
                    author,
                    year,
                    title,
                    publish,
                    abstract,
                ]
            )

        if not "abstract" in entry.fields:
            without_abstract = without_abstract + 1
            without_abstract_list[entry.fields["source"]] = (
                without_abstract_list[entry.fields["source"]] + 1
            )

    print("Without Abstract:\t", without_abstract, without_abstract_list)
    bib_data_out.to_file(file_name_path_out, bib_format="bibtex")

    if log_process:
        f_removed.close()
        f_final.close()
Beispiel #13
0
def write_result(name, new_data):
    """:param new_data is the dictionary of entries"""
    write_bib = BibliographyData(new_data)
    need_modification = write_bib.to_string('bibtex')
    with open("%s" % name, 'w', encoding='utf-8') as file:
        file.write(need_modification)
Beispiel #14
0
class WindowsNewlineTest(ParserTest, TestCase):
    input_strings = [
        u"""'@Article\r\n\r\n\r\n}\r\n'""",
    ]
    correct_result = BibliographyData()
    errors = ["syntax error in line 4: '(' or '{' expected"]
    parser = bibtex.Parser()
    bibdata = parser.parse_file(sys.argv[1])

    entries = []

    for tag in bibdata.entries.keys():
        d = {}
        entry = bibdata.entries[tag]
        fields = entry.fields

        d["tag"] = tag
        for k in fields.keys():
            d[k.lower()] = fields[k]

        d["bibtex.string"] = BibliographyData({tag: entry}).to_string("bibtex")

        entries.append(d)

    df = pd.DataFrame(entries)
    df = df.set_index("tag")

    fName = sys.argv[1].split(".")[0]
    df.to_csv(fName + ".converted.csv")

elif sys.argv[1].endswith(".csv"):

    outfile = open(sys.argv[1].split(".")[0] + ".converted.bib", "w+")

    df = pd.read_csv(sys.argv[1])
    df = df.set_index("tag")
Beispiel #16
0
reference_data = BibliographyData(entries=[
    ('ruckenstein-diffusion',
     Entry(
         'article',
         fields={
             'language': 'english',
             'title':
             'Predicting the Diffusion Coefficient in Supercritical Fluids',
             'journal': 'Ind. Eng. Chem. Res.',
             'volume': '36',
             'year': '1997',
             'pages': '888-895',
         },
         persons={
             'author': [Person('Liu, Hongquin'),
                        Person('Ruckenstein, Eli')]
         },
     )),
    ('test-booklet',
     Entry('booklet',
           fields={
               'language': 'english',
               'title': 'Just a booklet',
               'year': '2006',
               'month': 'January',
               'address': 'Moscow',
               'howpublished': 'Published by Foo',
           },
           persons={'author': [Person('de Last, Jr., First Middle')]})),
    ('test-inbook',
     Entry('inbook',
           fields={
               'publisher': 'Some Publisher',
               'language': 'english',
               'title': 'Some Title',
               'series': 'Some series',
               'booktitle': 'Some Good Book',
               'number': '3',
               'edition': 'Second',
               'year': '1933',
               'pages': '44--59',
           },
           persons={'author': [Person('Jackson, Peter')]})),
    ('viktorov-metodoj',
     Entry(
         'book',
         fields={
             'publisher':
             'Л.: <<Химия>>',
             'year':
             '1977',
             'language':
             'russian',
             'title':
             'Методы вычисления физико-химических величин и прикладные расчёты',
         },
         persons={'author': [Person('Викторов, Михаил Маркович')]})),
],
                                  preamble=['%%% pybtex example file'])
Beispiel #17
0
    def run(self):

        style = find_plugin('pybtex.style.formatting',
                            self.options.get('style', 'unsrt'))()
        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_authors = self.options.get('highlight_author', None)
        if highlight_authors:
            highlight_authors = highlight_authors.split(';')
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'],
                      reverse=True)

        html = '<div class="publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.mkdir(
                    os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            pub_html = list(style.format_entries(
                (entry, )))[0].text.render_as('html')
            if highlight_authors:  # highlight one of several authors (usually oneself)
                for highlight_author in highlight_authors:
                    pub_html = pub_html.replace(
                        highlight_author.strip(),
                        '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html

            extra_links = ""
            bibtex_fields = dict(entry.fields)
            # Remove some fields for the publicly available BibTeX file since they are mostly only
            # used by this plugin.
            for field_to_remove in ('abstract', 'fulltext'):
                if field_to_remove in bibtex_fields:
                    del bibtex_fields[field_to_remove]
            bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons)
            # detail_page_dir may need bib_data later
            bib_data = BibliographyData(dict({label: bibtex_entry}))
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]),
                                 'bibtex')
                extra_links += '[<a href="{}">BibTeX</a>] '.format(
                    self.site.config['BASE_URL'] + bib_link)

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(
                    entry.fields['fulltext'])

            if extra_links or detail_page_dir:
                html += '<br>'
            html += extra_links

            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += ' [<a href="{}">abstract and details</a>]'.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title':
                    str(LaTeXParser(entry.fields['title']).parse()),
                    'abstract':
                    str(LaTeXParser(entry.fields['abstract']).parse())
                    if 'abstract' in entry.fields else '',
                    'bibtex':
                    bib_data.to_string('bibtex'),
                    'bibtex_link':
                    '/' + bib_link if bibtex_dir else '',
                    'default_lang':
                    self.site.config['DEFAULT_LANG'],
                    'label':
                    label,
                    'lang':
                    self.site.config['DEFAULT_LANG'],
                    'permalink':
                    self.site.config['SITE_URL'] + page_url,
                    'reference':
                    pub_html,
                    'extra_links':
                    extra_links
                }

                if 'fulltext' in entry.fields and entry.fields[
                        'fulltext'].endswith('.pdf'):
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir,
                                      label + '.html')),
                    context,
                )

            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [
            nodes.raw('', html, format='html'),
        ]
Beispiel #18
0
def run(folderPath, fileList, fileNameOut, logProcess):
    global mergedCont

    if logProcess:
        fRemoved = open(os.path.join(folderPath, 'BibFilesMerge_removed.csv'),
                        'w',
                        encoding='utf-8')
        csvRemoved = csv.writer(fRemoved, delimiter=';', quotechar='"')
        csvRemoved.writerow([
            'cause', 'source', 'key', 'doi', 'author', 'year', 'title',
            'publish'
        ])
        fFinal = open(os.path.join(folderPath, 'BibFilesMerge_final.csv'),
                      'w',
                      encoding='utf-8')
        csvFinal = csv.writer(fFinal, delimiter=';', quotechar='"')
        csvFinal.writerow([
            'key', 'source', 'doi', 'author', 'year', 'title', 'publish',
            'abstract'
        ])

    fileNamePathOut = os.path.join(folderPath, fileNameOut)

    bibDataOut = BibliographyData()

    total = 0
    mergedCont = 0
    withoutAuthor = 0
    withoutYear = 0
    withoutJornal = 0
    duplicates = 0

    print()
    print()

    for bibFileName in fileList:

        bibData = parse_file(os.path.join(folderPath, bibFileName))

        print(bibFileName + ':', len(bibData.entries.values()),
              "                                             ")

        for entry in bibData.entries.values():
            total = total + 1

            doi = getEntryDOIStr(entry)
            author = getEntryAuthorStr(entry)
            year = getEntryYearStr(entry)
            title = getEntryTitleStr(entry)
            publish = getEntryPublishStr(entry)

            if author == '':
                withoutAuthor = withoutAuthor + 1
                if logProcess:
                    #cause;source;key;doi;author;year;title;publish
                    csvRemoved.writerow([
                        'no author', bibFileName, entry.key, doi, author, year,
                        title, publish
                    ])

            elif year == '':
                withoutYear = withoutYear + 1
                if logProcess:
                    #cause;source;key;doi;author;year;title;publish
                    csvRemoved.writerow([
                        'no year', bibFileName, entry.key, doi, author, year,
                        title, publish
                    ])

            elif publish == '':
                withoutJornal = withoutJornal + 1
                if logProcess:
                    #cause;source;key;doi;author;year;title;publish
                    csvRemoved.writerow([
                        'no journal', bibFileName, entry.key, doi, author,
                        year, title, publish
                    ])

            else:
                key = entry.key.lower()
                print("Key " + key + "               \r", end="", flush=True)

                entry.fields['source'] = bibFileName
                oldEntry = None
                cleanTitle = cleanStringToCompare(title)

                for entryOut in bibDataOut.entries.values():
                    if (doi != ''):
                        doiOut = getEntryDOIStr(entryOut)
                        if (doiOut != '' and doi == doiOut):
                            oldEntry = entryOut
                            break

                    cleanOutTitle = cleanStringToCompare(
                        entryOut.fields['title'])
                    if (cleanTitle == cleanOutTitle):
                        year = int(str(entry.rich_fields['year']))
                        yearOut = int(str(entryOut.rich_fields['year']))
                        diff = abs(year - yearOut)
                        if (diff == 0):
                            oldEntry = entryOut
                        elif (diff == 1 or diff == 2):
                            try:
                                lastname = unidecode.unidecode(
                                    entry.persons['author']
                                    [0].last_names[0]).lower()
                            except:
                                lastname = ""

                            try:
                                lastNameOut = unidecode.unidecode(
                                    entryOut.persons['author']
                                    [0].last_names[0]).lower()
                            except:
                                lastNameOut = ""

                            try:
                                firstName = unidecode.unidecode(
                                    entry.persons['author']
                                    [0].firstNames[0]).lower()
                            except:
                                firstName = ""

                            try:
                                firstNameOut = unidecode.unidecode(
                                    entryOut.persons['author']
                                    [0].firstNames[0]).lower()
                            except:
                                firstNameOut = ""

                            if (lastname == lastNameOut
                                    or lastname == firstNameOut
                                    or lastNameOut == firstName):
                                oldEntry = entryOut
                                break

                if (oldEntry != None):
                    duplicates = duplicates + 1

                    if logProcess:
                        #cause;source;key;doi;author;year;title;publish
                        csvRemoved.writerow([
                            'duplicate of next', bibFileName, entry.key, doi,
                            author, year, title, publish
                        ])

                        doi = getEntryDOIStr(oldEntry)
                        author = getEntryAuthorStr(oldEntry)
                        year = getEntryYearStr(oldEntry)
                        title = getEntryTitleStr(oldEntry)
                        publish = getEntryPublishStr(oldEntry)
                        csvRemoved.writerow([
                            'duplicate of prev', oldEntry.fields['source'],
                            oldEntry.key, doi, author, year, title, publish
                        ])

                    bibDataOut.entries[oldEntry.key] = mergeEntry(
                        oldEntry, entry)

                else:
                    while (key in bibDataOut.entries.keys()):
                        key = key + "_a"
                    bibDataOut.entries[key] = entry

    print("                                                     ")
    print("Total:\t\t", total)

    print("No Author:\t", withoutAuthor)
    print("No Year:\t", withoutYear)
    print("No Publisher:\t", withoutJornal)

    print("Duplicates:", duplicates, "| Merged:", mergedCont)
    print("Final:\t\t", len(bibDataOut.entries))

    withoutAbstractList = {i: 0 for i in fileList}
    withoutAbstract = 0
    for entry in bibDataOut.entries.values():
        if logProcess:
            doi = getEntryDOIStr(entry)
            author = getEntryAuthorStr(entry)
            year = getEntryYearStr(entry)
            title = getEntryTitleStr(entry)
            publish = getEntryPublishStr(entry)
            abstract = getEntryAbstractStr(entry)

            #key;source;doi;author;year;title;publish;abstract
            csvFinal.writerow([
                entry.key, entry.fields['source'], doi, author, year, title,
                publish, abstract
            ])

        if not 'abstract' in entry.fields:
            withoutAbstract = withoutAbstract + 1
            withoutAbstractList[entry.fields['source']] = withoutAbstractList[
                entry.fields['source']] + 1

    print("without Abstract ", withoutAbstract, withoutAbstractList)

    bibDataOut.to_file(fileNamePathOut)

    if logProcess:
        fRemoved.close()
        fFinal.close()
Beispiel #19
0
    entries = set()
    dupentries = False
    with open(args.f, 'r') as bin:
        for l in bin:
            if l.startswith('@'):
                l = l.replace('@misc', '')
                l = l.replace('@article', '')
                l = l.replace('@inproceedings', '')
                if l in entries:
                    sys.stderr.write("Duplicate entry " +
                                     l.replace('{', '').replace(',', ''))
                    dupentries = True
                entries.add(l)

    if dupentries:
        sys.stderr.write(
            "FATAL: The bibtex file has duplicate entries in it. Please remove them before trying to continue\n"
        )
        sys.stderr.write(
            "(It is an issue with Google Scholar, but pybtex breaks with duplicate entries. Sorry)\n"
        )
        sys.exit(-1)

    bib = parse_file(args.f, 'bibtex')

    for e in bib.entries:
        if 'year' in bib.entries[e].fields:
            if int(bib.entries[e].fields['year']) >= args.y:
                bib_data = BibliographyData({e: bib.entries[e]})
                print(bib_data.to_string('bibtex'))
Beispiel #20
0
    def run(self, lines):
        """
        Create a bibliography from cite commands.
        """

        # Join the content to enable regex searches throughout entire text
        content = '\n'.join(lines)

        # Build the database of bibtex data
        self._citations = []  # member b/c it is used in substitution function
        self._bibtex = BibliographyData()  # ""
        bibfiles = []
        match = re.search(self.RE_BIBLIOGRAPHY, content)
        if match:
            for bfile in match.group(1).split(','):
                try:
                    filename, _ = self.getFilename(bfile.strip())
                    bibfiles.append(filename)
                    data = self.parseBibtexFile(bibfiles[-1])
                    self._bibtex.add_entries(data.entries.iteritems())
                except UndefinedMacro:
                    LOG.error('Undefined macro in bibtex file: %s, specify macro_files arguments ' \
                              'in configuration file (e.g. website.yml)', bfile.strip())
                except TypeError:
                    LOG.error('Unable to locate bibtex file in %s',
                              self.markdown.current.filename)
        else:
            return lines

        # Determine the style
        match = re.search(self.RE_STYLE, content)
        if match:
            content = content.replace(match.group(0), '')
            try:
                style = find_plugin('pybtex.style.formatting', match.group(1))
            except PluginNotFound:
                LOG.error('Unknown bibliography style "%s"', match.group(1))
                return lines

        else:
            style = find_plugin('pybtex.style.formatting', 'plain')

        # Replace citations with author date, as an anchor
        content = re.sub(self.RE_CITE, self.authors, content)

        # Create html bibliography
        if self._citations:

            # Generate formatted html using pybtex
            formatted_bibliography = style().format_bibliography(
                self._bibtex, self._citations)
            backend = find_plugin('pybtex.backends', 'html')
            stream = io.StringIO()
            backend().write_to_stream(formatted_bibliography, stream)

            # Strip the bib items from the formatted html
            html = re.findall(r'\<dd\>(.*?)\</dd\>',
                              stream.getvalue(),
                              flags=re.MULTILINE | re.DOTALL)

            # Produces an ordered list with anchors to the citations
            output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'
            output = output.format(str(bibfiles))
            for i, item in enumerate(html):
                output += u'<li name="{}">{}</li>\n'.format(
                    self._citations[i], item)
            output += u'</ol>\n'
            content = re.sub(self.RE_BIBLIOGRAPHY,
                             self.markdown.htmlStash.store(output, safe=True),
                             content)

        return content.split('\n')
Beispiel #21
0
            file = PdfFileReader(in_)
            last_page = start_page + file.getNumPages() - 1
            bib_entry.fields['pages'] = '{}--{}'.format(start_page, last_page)
            start_page = last_page + 1

    # Add the abstract if present
    if submission_id in abstracts:
        bib_entry.fields['abstract'] = abstracts.get(submission_id)

    # Add booktitle for non-proceedings entries
    if bib_type == 'inproceedings':
        bib_entry.fields['booktitle'] = metadata['booktitle']

    try:
        bib_string = BibliographyData({
            anthology_id: bib_entry
        }).to_string('bibtex')
    except TypeError as e:
        print('Fatal: Error in BibTeX-encoding paper',
              submission_id,
              file=sys.stderr)
        sys.exit(1)
    final_bibs.append(bib_string)
    with open(bib_path, 'w') as out_bib:
        print(bib_string, file=out_bib)
        print('CREATED', bib_path)

# Create an index for LaTeX book proceedings
if not os.path.exists('book-proceedings'):
    os.makedirs('book-proceedings')
Beispiel #22
0
    def add_publications(self):
        # Check if PUBLICATIONS_SRC is set
        if 'PUBLICATIONS_SRC' not in self.settings:
            logger.warn('PUBLICATIONS_SRC not set')
            return

        # Try to parse the bibtex files
        pub_dir = self.settings['PUBLICATIONS_SRC']
        try:
            bibdata_all = BibliographyData()
            for file in os.listdir(pub_dir):
                with codecs.open(pub_dir + os.sep + file, 'r',
                                 encoding="utf8") as stream:
                    bibdata = Parser().parse_stream(stream)
                    key, entry = bibdata.entries.items()[0]
                    bibdata_all.entries[key] = entry
        except PybtexError as e:
            logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                        (file, str(e)))
            return

        # Create Publication objects and add them to a list
        publications = []

        # format entries
        plain_style = plain.Style()
        formatted_entries = list(
            plain_style.format_entries(bibdata_all.entries.values()))

        decoder = latexcodec.lexer.LatexIncrementalDecoder()

        for entry in bibdata_all.entries:
            raw_tex = BibliographyData(entries={
                entry: bibdata_all.entries[entry]
            }).to_string('bibtex')
            #raw_tex += '\n}'
            formatted_entry = list(
                plain_style.format_entries([bibdata_all.entries[entry]]))[0]

            key = formatted_entry.key
            entry = bibdata_all.entries[key]

            year = entry.fields.get('year', 2018)

            authors = entry.fields.get('author', '').split(' and ')
            print(authors)
            parsed_authors = []
            for author in authors:
                if ',' in author:
                    parsed_authors.append(LatexNodes2Text().latex_to_text(
                        re.sub(r'[\{\}]', '', (author.split(',')[1] + ' ' +
                                               author.split(',')[0]).strip())))
                else:
                    parsed_authors.append(
                        LatexNodes2Text().latex_to_text(author))
            authors = parsed_authors

            title = LatexNodes2Text().latex_to_text(
                entry.fields.get('title', ''))

            pdf = entry.fields.get('pdf', None)
            slides = entry.fields.get('slides', None)
            poster = entry.fields.get('poster', None)

            where = ''
            if 'booktitle' in entry.fields:
                where = LatexNodes2Text().latex_to_text(
                    entry.fields.get('booktitle'))
            elif 'journal' in entry.fields:
                where = LatexNodes2Text().latex_to_text(
                    entry.fields.get('journal'))

            abstract = entry.fields.get('abstract', '')

            pub = Publication(key,
                              authors,
                              title,
                              year,
                              where,
                              abstract=abstract,
                              pdf_url=pdf,
                              resource_urls=[('slides', slides),
                                             ('poster', poster)])
            pub.citations['bib'] = raw_tex.rstrip('\r\n')
            publications.append(pub)
            self.publications_per_year[pub.year].append(pub)
            for author in authors:
                if author in self.context['MEDIUS_AUTHORS'].keys():
                    self.publications_per_author[author].append(pub)
            self.publications_per_type[BIBTEX_TYPE_TO_TEXT[entry.type]].append(
                pub)
            self.publications_per_type_rev[pub] = BIBTEX_TYPE_TO_TEXT[
                entry.type]

        return publications
Beispiel #23
0
class BracesAndQuotesTest(ParserTest, TestCase):
    input_string = u'''@ARTICLE{
                test,
                title="Nested braces  and {"quotes"}",
        }'''
    correct_result = BibliographyData([(u'test', Entry('article', [(u'title', 'Nested braces and {"quotes"}')]))])
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        #from pybtex.style.formatting import plain
        from rahul_style import Style as RahulStyle

    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bibtex` failed to parse file %s: %s' %
                    (refs_file, str(e)))
        return

    publications = []

    # format entries
    plain_style = RahulStyle()
    #plain_style = plain.Style()
    html_backend = html.Backend()

    html_backend.symbols['br'] = u'<BR/>'

    all_entries = bibdata_all.entries.values()

    # remove URL field if DOI is present
    for entry in all_entries:
        if "doi" in entry.fields.keys():
            entry.fields._dict["url"] = ""

    formatted_entries = plain_style.format_entries(all_entries)
    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        pub_type = entry.type
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        #slides = entry.fields.get('slides', None)
        #poster = entry.fields.get('poster', None)
        doi = entry.fields.get('doi', None)
        url = entry.fields.get('url', None)
        arxiv = entry.fields.get('arxiv', None)

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)

        text = formatted_entry.text.render(html_backend)

        # prettify entries
        # remove BibTeX's {}
        text = text.replace("\{", "")
        text = text.replace("{", "")
        text = text.replace("\}", "")
        text = text.replace("}", "")
        # remove textbf used for cv
        text = text.replace("\\textbf ", "")
        # remove \ that comes after Proc.
        text = text.replace("\\", "")

        publications.append((pub_type, key, year, text, bib_buf.getvalue(),
                             pdf, doi, url, arxiv))

    generator.context['publications'] = publications
Beispiel #25
0
class EmptyDataTest(ParserTest, TestCase):
    input_string = u''
    correct_result = BibliographyData()
Beispiel #26
0
def import_arxiv(arxiv_id=None,
                 tags=None,
                 path="~/Papers",
                 path_tmp="/tmp/",
                 **kwargs):
    assert arxiv_id is not None
    papers_path = Path(path).expanduser().absolute()

    new_folder = (Path(path_tmp).expanduser().absolute() /
                  f"papers_import_arxiv_{arxiv_id}")
    new_file = new_folder / Path(arxiv_id + ".arxiv")

    if not new_file.exists():
        new_file.parent.mkdir(exist_ok=True)
        new_file.touch()

    if tags != None:
        tags = tags.split(",")
        for tag in tags:
            path = Path(new_folder, "#" + tag.strip())
            if not path.exists():
                path.touch()

    # Get arxiv info
    info, xmldoc = arxiv2dict(arxiv_id)

    # Save xml
    xml = new_file.parent / (arxiv_id + ".arxiv")
    with open(xml, "w+") as f:
        f.write(xmldoc.toxml())

    # Cite key
    citekey = make_citekey(info["first_author_surname"].lower(), info["year"],
                           info["title"])

    # PDF
    pdf_name = citekey + ".pdf"
    files_pdf = list(new_file.parent.glob("*.pdf"))
    if len(files_pdf) == 0:  # get pdf
        response = requests.get(info["url_pdf"])
        with open(new_file.parent / pdf_name, "wb") as f:
            f.write(response.content)
    if len(files_pdf) == 1 and files_pdf[0] != pdf_name:
        files_pdf[0].rename(new_file.parent / pdf_name)

    # Write abstract.txt
    abstract = new_file.parent / "abstract.txt"
    with open(abstract, "w+") as f:
        f.write(info["abstract"].strip())

    # Bib entry
    ref_entry = {
        citekey:
        Entry(
            "article",
            [
                ("author", " and ".join(info["author"])),
                ("title", str(info["title"])),
                ("year", str(info["year"])),
                ("eprint", str(info["id"])),
                ("journal", "arXiv preprint"),
            ],
        ),
    }

    # Write bib file
    ref = new_file.parent / f"{citekey}.bib"
    with open(ref, "w+") as f:
        f.write(BibliographyData(ref_entry).to_string("bibtex"))

    # Rename parent folder according to citekey
    new_folder_renamed = Path(new_file.parent.parent / citekey)
    Path(new_file.parent).rename(new_folder_renamed)

    # Move folder
    shutil.move(str(new_folder_renamed), str(papers_path / citekey))
Beispiel #27
0
from pybtex.database import BibliographyData, Entry

bib_data = BibliographyData({
    'article-minimal':
    Entry('article', [
        ('author', 'L[eslie] B. Lamport'),
        ('title', 'The Gnats and Gnus Document Preparation System'),
        ('journal', "G-Animal's Journal"),
        ('year', '1986'),
    ]),
})

print(bib_data.to_string('bibtex'))
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        local path to the BibTeX file to read.

    Output
    ------
    generator.context['publications']:
        List of tuples (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bibtex` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bibtex` failed to parse file %s: %s' % (
            refs_file,
            str(e)))
        return

    publications = []

    # format entries
    plain_style = plain.Style()
    html_backend = html.Backend()
    formatted_entries = plain_style.format_entries(bibdata_all.entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        slides = entry.fields.get('slides', None)
        poster = entry.fields.get('poster', None)

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)
        text = formatted_entry.text.render(html_backend)

        publications.append((key,
                             year,
                             text,
                             bib_buf.getvalue(),
                             pdf,
                             slides,
                             poster))

    generator.context['publications'] = publications
Beispiel #29
0
    def run(self):

        bibtex_dir = self.options.get('bibtex_dir', 'bibtex')
        detail_page_dir = self.options.get('detail_page_dir', 'papers')
        highlight_authors = self.options.get('highlight_author', None)
        if highlight_authors:
            highlight_authors = highlight_authors.split(';')
        style = Style(self.site.config['BASE_URL'] +
                      detail_page_dir if detail_page_dir else None)
        self.state.document.settings.record_dependencies.add(self.arguments[0])

        parser = Parser()

        # Sort the publication entries by year reversed
        data = sorted(parser.parse_file(self.arguments[0]).entries.items(),
                      key=lambda e: e[1].fields['year'],
                      reverse=True)

        html = '<div class="publication-list">\n'
        cur_year = None

        if bibtex_dir:  # create the bibtex dir if the option is set
            try:
                os.makedirs(os.path.sep.join((self.output_folder, bibtex_dir)))
            except OSError:  # probably because the dir already exists
                pass

        if detail_page_dir:  # create the detail page dir if the option is set
            try:
                os.makedirs(
                    os.path.sep.join((self.output_folder, detail_page_dir)))
            except OSError:  # probably because the dir already exists
                pass

        for label, entry in data:
            # print a year title when year changes
            if entry.fields['year'] != cur_year:
                if cur_year is not None:  # not first year group
                    html += '</ul>'
                cur_year = entry.fields['year']
                html += '<h3>{}</h3>\n<ul>'.format(cur_year)

            entry.label = label  # Pass label to the style.
            pub_html = list(style.format_entries(
                (entry, )))[0].text.render_as('html')
            if highlight_authors:  # highlight one of several authors (usually oneself)
                for highlight_author in highlight_authors:
                    pub_html = pub_html.replace(
                        highlight_author.strip(),
                        '<strong>{}</strong>'.format(highlight_author), 1)
            html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html

            extra_links = ""

            if 'fulltext' in entry.fields:  # the link to the full text, usually a link to the pdf file
                extra_links += '[<a href="{}">full text</a>] '.format(
                    entry.fields['fulltext'])

            bibtex_fields = dict(entry.fields)
            # Collect and remove custom links (fields starting with "customlink")
            custom_links = dict()
            for key, value in bibtex_fields.items():
                if key.startswith('customlink'):
                    custom_links[key[len('customlink'):]] = value
            # custom fields (custom links)
            for key, value in custom_links.items():
                extra_links += '[<a href="{}">{}</a>] '.format(value, key)

            # Remove some fields for the publicly available BibTeX file since they are mostly only
            # used by this plugin.
            for field_to_remove in ('abstract', 'fulltext'):
                if field_to_remove in bibtex_fields:
                    del bibtex_fields[field_to_remove]
            # Prepare for the bib file. Note detail_page_dir may need bib_data later.
            bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons)
            bib_data = BibliographyData(dict({label: bibtex_entry}))
            bib_string = bib_data.to_string('bibtex')
            extra_links += '''
            [<a href="javascript:void(0)" onclick="
            (function(target, id) {{
              if ($('#' + id).css('display') == 'block')
              {{
                $('#' + id).hide('fast');
                $(target).text('BibTeX&#x25BC;')
              }}
              else
              {{
                $('#' + id).show('fast');
                $(target).text('BibTeX&#x25B2;')
              }}
            }})(this, '{}');">BibTeX&#x25BC;</a>]
            '''.format('bibtex-' + label)
            if bibtex_dir:  # write bib files to bibtex_dir for downloading
                bib_link = '{}/{}.bib'.format(bibtex_dir, label)
                bib_data.to_file('/'.join([self.output_folder, bib_link]),
                                 'bibtex')

            if extra_links or detail_page_dir or 'abstract' in entry.fields:
                html += '<br>'

            # Add the abstract link.
            if 'abstract' in entry.fields:
                html += '''
                [<a href="javascript:void(0)" onclick="
                (function(target, id) {{
                  if ($('#' + id).css('display') == 'block')
                {{
                  $('#' + id).hide('fast');
                  $(target).text('abstract&#x25BC;')
                }}
                else
                {{
                  $('#' + id).show('fast');
                  $(target).text('abstract&#x25B2;')
                }}
                }})(this, '{}');">abstract&#x25BC;</a>] '''.format(
                    'abstract-' + label)

            display_none = '<div id="{}" style="display:none"><pre>{}</pre></div>'
            bibtex_display = display_none.format('bibtex-' + label, bib_string)

            abstract_text = str(LaTeXParser(entry.fields['abstract']).parse()
                                ) if 'abstract' in entry.fields else ''
            if detail_page_dir:  # render the details page of a paper
                page_url = '/'.join((detail_page_dir, label + '.html'))
                html += '[<a href="{}">details</a>] '.format(
                    self.site.config['BASE_URL'] + page_url)
                context = {
                    'title': str(LaTeXParser(entry.fields['title']).parse()),
                    'abstract': abstract_text,
                    'bibtex': bib_data.to_string('bibtex'),
                    'bibtex_link': '/' + bib_link if bibtex_dir else '',
                    'default_lang': self.site.config['DEFAULT_LANG'],
                    'label': label,
                    'lang': self.site.config['DEFAULT_LANG'],
                    'permalink': self.site.config['SITE_URL'] + page_url,
                    'reference': pub_html,
                    'extra_links': extra_links + bibtex_display
                }

                if 'fulltext' in entry.fields and entry.fields[
                        'fulltext'].endswith('.pdf'):
                    context['pdf'] = entry.fields['fulltext']

                self.site.render_template(
                    'publication.tmpl',
                    os.path.sep.join((self.output_folder, detail_page_dir,
                                      label + '.html')),
                    context,
                )

            html += extra_links

            # Add the hidden abstract and bibtex.
            if 'abstract' in entry.fields:
                html += '''
                <div id="{}" class="publication-abstract" style="display:none">
                <blockquote>{}</blockquote></div>
                '''.format('abstract-' + label, abstract_text)
            html += bibtex_display
            html += '</li>'

        if len(data) != 0:  # publication list is nonempty
            html += '</ul>'

        html += '</div>'

        return [
            nodes.raw('', html, format='html'),
        ]
Beispiel #30
0
def add_publications(generator):
    """
    Populates context with a list of BibTeX publications.

    Configuration
    -------------
    generator.settings['PUBLICATIONS_SRC']:
        Local path to the BibTeX file to read.

    generator.settings['PUBLICATIONS_SPLIT_BY']:
        The name of the bibtex field used for splitting the publications.
        No splitting if title is not provided.

    generator.settings['PUBLICATIONS_UNTAGGED_TITLE']:
        The title of the header for all untagged entries.
        No such list if title is not provided.

    Output
    ------
    generator.context['publications_lists']:
        A map with keys retrieved from the field named in PUBLICATIONS_SPLIT_TAG.
        Values are lists of tuples (key, year, text, bibtex, pdf, slides, poster)
        See Readme.md for more details.

    generator.context['publications']:
        Contains all publications as a list of tuples
        (key, year, text, bibtex, pdf, slides, poster).
        See Readme.md for more details.
    """
    if 'PUBLICATIONS_SRC' not in generator.settings:
        return
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    try:
        from pybtex.database.input.bibtex import Parser
        from pybtex.database.output.bibtex import Writer
        from pybtex.database import BibliographyData, PybtexError
        from pybtex.backends import html
        from pybtex.style.formatting import plain
    except ImportError:
        logger.warn('`pelican_bib` failed to load dependency `pybtex`')
        return

    refs_file = generator.settings['PUBLICATIONS_SRC']
    try:
        bibdata_all = Parser().parse_file(refs_file)
    except PybtexError as e:
        logger.warn('`pelican_bib` failed to parse file %s: %s' %
                    (refs_file, str(e)))
        return

    publications = []
    publications_lists = {}
    publications_untagged = []

    split_by = None
    untagged_title = None

    if 'PUBLICATIONS_SPLIT_BY' in generator.settings:
        split_by = generator.settings['PUBLICATIONS_SPLIT_BY']

    if 'PUBLICATIONS_UNTAGGED_TITLE' in generator.settings:
        untagged_title = generator.settings['PUBLICATIONS_UNTAGGED_TITLE']

    # format entries
    plain_style = plain.Style()
    html_backend = html.Backend()
    formatted_entries = plain_style.format_entries(
        bibdata_all.entries.values())

    for formatted_entry in formatted_entries:
        key = formatted_entry.key
        entry = bibdata_all.entries[key]
        year = entry.fields.get('year')
        # This shouldn't really stay in the field dict
        # but new versions of pybtex don't support pop
        pdf = entry.fields.get('pdf', None)
        slides = entry.fields.get('slides', None)
        poster = entry.fields.get('poster', None)

        tags = []
        if split_by:
            tags = entry.fields.get(split_by, [])

            # parse to list, and trim each string
            if tags:

                tags = [tag.strip() for tag in tags.split(',')]

                # create keys in publications_lists if at least one
                # tag is given
                for tag in tags:
                    publications_lists[tag] = publications_lists.get(tag, [])

        #render the bibtex string for the entry
        bib_buf = StringIO()
        bibdata_this = BibliographyData(entries={key: entry})
        Writer().write_stream(bibdata_this, bib_buf)
        text = formatted_entry.text.render(html_backend)

        entry_tuple = {
            'key': key,
            'year': year,
            'text': text,
            'bibtex': bib_buf.getvalue(),
            'pdf': pdf,
            'slides': slides,
            'poster': poster
        }

        publications.append(entry_tuple)

        for tag in tags:
            publications_lists[tag].append(entry_tuple)

        if not tags and untagged_title:
            publications_untagged.append(entry_tuple)

    # append untagged list if title is given
    if untagged_title and publications_untagged:
        publications_lists[untagged_title] = publications_untagged

    # output
    generator.context['publications'] = publications
    generator.context['publications_lists'] = publications_lists