def test_display_order(self):
        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.display_order = ['year', 'publisher', 'title']
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 title = {A title},
 author = {Toto, A and Titi, B}
}

@article{Wigner1938,
 year = {1938},
 publisher = {The Royal Society of Chemistry},
 title = {The transition state method},
 author = {Wigner, E.},
 doi = {10.1039/TF9383400029},
 issn = {0014-7672},
 journal = {Trans. Faraday Soc.},
 owner = {fr},
 pages = {29--41},
 volume = {34}
}

@book{Yablon2005,
 year = {2005},
 publisher = {Springer},
 title = {Optical fiber fusion slicing},
 author = {Yablon, A.D.}
}

"""
        self.assertEqual(result, expected)
Example #2
0
    def export_to_bibtex_one_file(self, path: str = "all.bib"):
        """stores publications in bibtex format in one file

        Parameters
        ----------
        path : optional
            path where the resulting file should be stored, by default "all.bib"

        Raises
        ------
        KeyError
            if the type of publication and the handle are not specified
        """
        self._create_dir(path)
        for pub in self._dep_pubs:
            meta = pub.get_bibtex_representation()
            if not meta:
                print("This pub has no meta")
            else:
                if meta["type"] and meta["handle"]:
                    handle = meta.pop("handle")
                    pub_type = meta.pop("type")
                    db = BibDatabase()
                    db.entries = [meta.copy()]
                    db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type})
                    writer = BibTexWriter()
                    tot = []
                    for key in meta:
                        tot.append(key)
                    writer.display_order = tot
                    with open(path, "a") as bibfile:
                        bibfile.write(writer.write(db))
                else:
                    raise KeyError("the type of publication and metdata"
                                   + "are required")
Example #3
0
def _writer():
    '''
    Return a configured bibtex writer.
    '''
    writer = BibTexWriter()
    writer.indent = '  '
    writer.order_entries_by = ('ID',)
    writer.display_order = ['title', 'author', 'editor']
    return writer
Example #4
0
    def export(self, path: str = "./pubs/") -> None:
        """overrides superclass abstract method

        For each publication will be created a new folder with the title
        of that publication as the name of the folder.
        The bibtex file is named "cite.bib"  and written inside the folder previously
        created.

        Parameters
        ----------
        path : optional
            path where files should be saved, by default "./pubs/"

        Raises
        ------
        KeyError
            if the type of publication and the handle are not specified
        """
        self._create_dir(path)
        for pub in self._dep_pubs:
            meta = pub.get_bibtex_representation()
            if not meta:
                print("This pub has no meta")
            else:
                if meta["type"] and meta["handle"]:
                    handle = meta.pop("handle")
                    pub_type = meta.pop("type")
                    db = BibDatabase()
                    db.entries = [meta.copy()]
                    db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type})
                    writer = BibTexWriter()
                    tot = []
                    for key in meta:
                        tot.append(key)
                    # To keep the order of the elements inside the bibtex file
                    writer.display_order = tot
                    try:
                        # Bibtex representation title has curly braces
                        # meta["title"] = meta["title"][1:len(meta["title"]) - 1]
                        my_dir = meta["title"].replace("/", "_").replace(" ", "-") \
                            .replace("\"", "")
                        full_path = path + my_dir
                        if not os.path.exists(full_path):
                            os.mkdir(full_path, 0o755)
                            with open(full_path + "/" + "cite.bib", "w") \
                                    as bibfile:
                                bibfile.write(writer.write(db))
                        else:
                            with open(full_path + "/" + "cite.bib", "w") \
                                    as bibfile:
                                bibfile.write(writer.write(db))
                    except OSError:
                        print("Creation of the directory failed {}", my_dir)
                else:
                    raise KeyError("the type of publication and metdata"
                                   + "are required")
Example #5
0
def bibfile_latex_to_unicode(bibtex_fname):
    parser = BibTexParser(common_strings=True)
    with open(bibtex_fname) as bibtex_file:
        bibdb = bibtexparser.load(bibtex_file, parser=parser)
    for i, entry in enumerate(bibdb.entries):
        delete_field(bibdb, i, 'file')
        for field in entry.keys():
            bibdb.entries[i][field] = latex_to_unicode(entry[field])
    bibdb.comments = []
    writer = BibTexWriter()
    writer.display_order = ['title', 'year', 'author', 'journal', 'booktitle']
    clean_file = writer.write(bibdb)
    # Use for debug purposes:
    # with open('tmp.bib','w') as f:
    #     f.write(clean_file)
    return clean_file
def proc_bib(input_io: TextIOWrapper,
             output_io: TextIOWrapper,
             jdb: JournalDB,
             silent: bool = False,
             output_format: str = "bib",
             abbrev_type="iso4"):
    if not hasattr(Journal, abbrev_type):
        raise ValueError(f"Invalid abbreviation type `{abbrev_type}`")

    bib_db = bibtexparser.load(input_io)

    for entry in bib_db.entries:
        journaltitle = entry.get("journaltitle")
        if journaltitle is None:
            continue
        journaltitle = braces_regex.sub("", journaltitle)

        name_pattern = re.compile(fr"^{re.escape(journaltitle)}(:?.*)$",
                                  RegexFlag.IGNORECASE)
        # TODO: query using lambdas?
        # TODO: normalize names (just in index?).
        res = jdb.journals.query_one(Journal.names_key, name_pattern)
        if res:
            _, journal = res
            abbrev = getattr(journal, abbrev_type)

            if output_format == "bib":
                entry["journaltitle"] = f"{{{abbrev or journaltitle}}}"
            elif output_format == "sourcemap":
                gen_sourcemap_map(journal, journaltitle, abbrev, output_io)

        abbrev_msg = f"abbreviating to '{abbrev}'" if res else f"no abbreviation found"
        if not silent:
            info(f"found journal name '{journaltitle}'; {abbrev_msg}.")

    if output_format == "bib":
        bib_writer = BibTexWriter()
        bib_writer.add_trailing_comma = True
        bib_writer.display_order = None
        bib_writer.indent = "\t"
        bib_writer.order_entries_by = None
        bibtex_code = bib_writer.write(bib_db)
        output_io.write(bibtex_code)
    elif output_format == "sourcemap":
        pass
Example #7
0
def main():
    """Main function of the script.

    Loads the bib file, does the chcecking on it and prints out
    sorted and formated database.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--input",
                        type=argparse.FileType('r'),
                        default=sys.stdin,
                        help="Input file, default is stdin.")
    parser.add_argument("--output",
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        help="Optional output file.")
    parser.add_argument("--try-fix",
                        default=False,
                        action="store_true",
                        help="Flag to search information to fix the dtabase.")
    parser.add_argument("--anthologies",
                        type=str,
                        nargs='+',
                        help="List of BibTeX files with know papers.")
    args = parser.parse_args()

    if args.anthologies is not None:
        load_anthologies(args.anthologies)
    bib_database = bibtexparser.load(args.input, get_bibparser())
    cache_journal_issn(bib_database)
    authors, journals, booktitles = check_database(bib_database, args.try_fix)

    look_for_misspellings(authors, 'Authors')
    look_for_misspellings(journals, 'Journals')
    look_for_misspellings(booktitles,
                          'Booktitles (proceedings)',
                          threshold=0.9)

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_by = ['author', 'year', 'title']
    writer.display_order = ['author', 'title', 'booktitle', 'journal']
    writer.align_values = True
    args.output.write(writer.write(bib_database))
Example #8
0
def main():
    print("Reading from stdin ...", end="", file=sys.stderr)
    input_records = sys.stdin.read().split("\n\n")
    print("done.", file=sys.stderr)

    bib_parser = BibTexParser(ignore_nonstandard_types=True,
                              homogenize_fields=True,
                              common_strings=True)

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_by = ['author', 'year', 'title']
    writer.display_order = ['author', 'title', 'booktitle', 'journal']
    writer.align_values = True

    records = 0
    skipped = 0
    for record in input_records:
        if not record:
            continue
        try:
            parsed = bibtexparser.loads(record, bib_parser)
            records += 1
            if records % 1000 == 0:
                print("Processed {} records.".format(records), file=sys.stderr)
        except (pyparsing.ParseException,
                bibtexparser.bibdatabase.UndefinedString):
            skipped += 1

    for item in parsed.get_entry_list():
        if "abstract" in item:
            del item["abstract"]

    parsed.comments = []
    parsed.entries = [e for e in parsed.entries if e["ENTRYTYPE"] != "book"]
    parsed.entries = list(parsed.get_entry_dict().values())

    print(writer.write(parsed))
    print("Finished. {} records kept, {} skipped.".format(records, skipped),
          file=sys.stderr)
Example #9
0
def generate_bib_from_arxiv(arxiv_item, value, field="id"):
    if field == "ti":
        article_id = arxiv_item["id"].split("http://arxiv.org/abs/")[1]
    else:
        article_id = value

    key = "arxiv:" + article_id
    title = arxiv_item.title
    authors = arxiv_item.authors
    if len(authors) > 0:
        authors = " and ".join([author["name"] for author in authors])

    published = arxiv_item.published.split("-")
    year = ''
    if len(published) > 1:
        year = published[0]
    bib = BibDatabase()
    bib.entries = [{
        "title": title,
        "author": authors,
        "year": year,
        "eprinttype": "arxiv",
        "eprint": article_id,
        "keywords": "",
        "abstract": arxiv_item.summary,
        "ID": key,
        "ENTRYTYPE": "article"
    }]
    writer = BibTexWriter()
    writer.add_trailing_comma = True
    writer.display_order = [
        'title', 'author', 'year', 'eprinttype', 'eprint', 'keywords',
        'abstract'
    ]
    writer.indent = "  "
    bib = writer.write(bib)
    return bib
Example #10
0
#############################################
# First we do Publications stuff

import bibtexparser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase

import rfeed

writer = BibTexWriter()
writer.indent = '    '
writer.display_order = ('ENTRYTYPE', 'author', 'title', 'year', 'journal',
                        'booktitle', 'school', 'howpublished', 'editor',
                        'series', 'volume', 'issue', 'number', 'month',
                        'pages', 'numpages', 'publisher', 'organization',
                        'acmid', 'address', 'isbn', 'issn', 'location',
                        'language', 'doi', 'urldate', 'link', 'url', 'keyword',
                        'keywords', 'abstract')


def entry_sort_key(entry):
    if 'urldate' in entry:
        return entry['urldate']
    if 'link' not in entry:
        raise Exception("{} does not have attribute 'link'".format(
            entry['ID']))
    return entry['year'] + "-01-01"


def normalise_name(n):
Example #11
0
def parse_bibtex_entry(entry,
                       pub_dir="publication",
                       featured=False,
                       overwrite=False,
                       normalize=False,
                       dry_run=False):
    from academic.cli import log, LINKS_HEADER, ANTHOLOGY_LINK, ARXIV_LINK
    """Parse a bibtex entry and generate corresponding publication bundle"""
    log.info(f"Parsing entry {entry['ID']}")

    bundle_path = f"content/{pub_dir}/{slugify(entry['ID'])}"
    markdown_path = os.path.join(bundle_path, "index.md")
    cite_path = os.path.join(bundle_path, "cite.bib")
    date = datetime.utcnow()
    timestamp = date.isoformat("T") + "Z"  # RFC 3339 timestamp.

    # Do not overwrite publication bundle if it already exists.
    if not overwrite and os.path.isdir(bundle_path):
        log.warning(
            f"Skipping creation of {bundle_path} as it already exists. "
            f"To overwrite, add the `--overwrite` argument.")
        return

    # Create bundle dir.
    log.info(f"Creating folder {bundle_path}")
    if not dry_run:
        Path(bundle_path).mkdir(parents=True, exist_ok=True)

    # Prepare YAML front matter for Markdown file.
    frontmatter = ["---"]
    frontmatter.append(f'title: "{clean_bibtex_str(entry["title"])}"')
    year = ""
    month = "01"
    day = "01"
    if "date" in entry:
        dateparts = entry["date"].split("-")
        if len(dateparts) == 3:
            year, month, day = dateparts[0], dateparts[1], dateparts[2]
        elif len(dateparts) == 2:
            year, month = dateparts[0], dateparts[1]
        elif len(dateparts) == 1:
            year = dateparts[0]
    if "month" in entry and month == "01":
        month = month2number(entry["month"])
    if "year" in entry and year == "":
        year = entry["year"]
    if len(year) == 0:
        log.error(f'Invalid date for entry `{entry["ID"]}`.')
    frontmatter.append(f"date: {year}-{month}-{day}")

    frontmatter.append(f"publishDate: {timestamp}")

    authors = None
    if "author" in entry:
        authors = entry["author"]
    elif "editor" in entry:
        authors = entry["editor"]
    if authors:
        authors = clean_bibtex_authors(
            [i.strip() for i in authors.replace("\n", " ").split(" and ")])
        frontmatter.append(f"authors: [{', '.join(authors)}]")

    frontmatter.append(
        f'publication_types: ["{PUB_TYPES.get(entry["ENTRYTYPE"], 0)}"]')

    if "abstract" in entry:
        frontmatter.append(
            f'abstract: "{clean_bibtex_str(entry["abstract"])}"')
    else:
        frontmatter.append('abstract: ""')

    frontmatter.append(f"featured: {str(featured).lower()}")

    # Publication name.
    if "booktitle" in entry:
        frontmatter.append(
            f'publication: "*{clean_bibtex_str(entry["booktitle"])}*"')
    elif "journal" in entry:
        frontmatter.append(
            f'publication: "*{clean_bibtex_str(entry["journal"])}*"')
    elif "publisher" in entry:
        frontmatter.append(
            f'publication: "*{clean_bibtex_str(entry["publisher"])}*"')
    else:
        frontmatter.append('publication: ""')
    if "venue" in entry:
        frontmatter.append(
            f'publication_short: "{clean_bibtex_str(entry["venue"])}"')
        del entry["venue"]

    if "keywords" in entry:
        frontmatter.append(
            f'tags: [{clean_bibtex_tags(entry["keywords"], normalize)}]')
    if "arxiv" or "anthology" in entry:
        frontmatter.append(LINKS_HEADER)
    if "anthology" in entry:
        frontmatter.append(ANTHOLOGY_LINK +
                           clean_bibtex_str(entry["anthology"]))
        del entry["anthology"]
    if "arxiv" in entry:
        frontmatter.append(ARXIV_LINK + clean_bibtex_str(entry["arxiv"]))
    if "slides" in entry:
        frontmatter.append(f'url_slides: ' + entry['slides'])
    if "video" in entry:
        frontmatter.append(f'url_video: ' + entry['video'])
    if "doi" in entry:
        frontmatter.append(f'doi: "{entry["doi"]}"')

    if "recent" in entry:
        frontmatter.append(f'recent: {entry["recent"]}')
        del entry['recent']

    frontmatter.append(f'url_pdf: papers/' + entry['ID'] + '.pdf')
    if 'code' in entry:
        frontmatter.append(f'url_code: ' + entry['code'])
        del entry['code']

    frontmatter.append("---\n\n")

    # Save citation file.
    log.info(f"Saving citation to {cite_path}")
    db = BibDatabase()
    db.entries = [entry]
    writer = BibTexWriter()
    writer.display_order = ["title", "author", "booktitle", "month", "year", "address", "publisher",\
                         "pages","volume", "url", "arxiv", "abstract"]
    if not dry_run:
        with open(cite_path, "w", encoding="utf-8") as f:
            f.write(writer.write(db))

    # Save Markdown file.
    try:
        log.info(f"Saving Markdown to '{markdown_path}'")
        if not dry_run:
            with open(markdown_path, "w", encoding="utf-8") as f:
                f.write("\n".join(frontmatter))
    except IOError:
        log.error("Could not save file.")
               "volume",
               "series",
               "editor",
               "year",
               "month",
               "date",
               "publisher",
               "address",
               "isbn",
               "issn",
               "articleno",
               "track",
               "doi",
               "url",
               "urlsuppl1",
               "urlsuppl2",
               "urlsuppl3",
               "presentation-video", 
               "keywords",
               "abstract")

# bibtex entries indented by a single space
FIELD_INDENT = "  "

# Writer object to use for writing back nime proceedings in the correct format.
writer = BibTexWriter()
writer.indent = FIELD_INDENT
writer.display_order = FIELD_ORDER
writer.common_strings = False # would like it to write month 3-letter codes, but can't seem to avoid writing them at the start of each file weirdly.
writer.order_entries_by = ("articleno", "url", "ID")
Example #13
0
    print("Output file exists")
    sys.exit(1)

# read input
with open(sys.argv[1], "r") as f:
    a = bibtexparser.load(f, BibTexParser(common_strings=True))

with open(sys.argv[2], "r") as f:
    b = bibtexparser.load(f, BibTexParser(common_strings=True))

# merge data bases
merged = a
merged.comments.extend(b.comments)
merged.preambles.extend(b.preambles)
merged.strings.update(b.strings)

known = set(merged.entries_dict.keys())
for key, entry in b.entries_dict.items():
    if key not in known:
        known.add(key)
        merged.entries.append(entry)

# write to file
writer = BibTexWriter()
writer.indent = "  "
writer.add_trailing_comma = True
writer.display_order = ["author", "title"]

with open(sys.argv[3], "w") as f:
    f.write(writer.write(merged))