def main(): """Main function of the script. Loads the bib file, does the chcecking on it and prints out sorted and formated database. """ parser = argparse.ArgumentParser() parser.add_argument("--input", type=argparse.FileType('r'), default=sys.stdin, help="Input file, default is stdin.") parser.add_argument("--output", type=argparse.FileType('w'), default=sys.stdout, help="Optional output file.") parser.add_argument("--try-fix", default=False, action="store_true", help="Flag to search information to fix the dtabase.") parser.add_argument("--anthologies", type=str, nargs='+', help="List of BibTeX files with know papers.") args = parser.parse_args() if args.anthologies is not None: load_anthologies(args.anthologies) bib_database = bibtexparser.load(args.input, get_bibparser()) cache_journal_issn(bib_database) authors, journals, booktitles = check_database(bib_database, args.try_fix) look_for_misspellings(authors, 'Authors') look_for_misspellings(journals, 'Journals') look_for_misspellings(booktitles, 'Booktitles (proceedings)', threshold=0.9) writer = BibTexWriter() writer.indent = ' ' writer.order_by = ['author', 'year', 'title'] writer.display_order = ['author', 'title', 'booktitle', 'journal'] writer.align_values = True args.output.write(writer.write(bib_database))
def main(): print("Reading from stdin ...", end="", file=sys.stderr) input_records = sys.stdin.read().split("\n\n") print("done.", file=sys.stderr) bib_parser = BibTexParser(ignore_nonstandard_types=True, homogenize_fields=True, common_strings=True) writer = BibTexWriter() writer.indent = ' ' writer.order_by = ['author', 'year', 'title'] writer.display_order = ['author', 'title', 'booktitle', 'journal'] writer.align_values = True records = 0 skipped = 0 for record in input_records: if not record: continue try: parsed = bibtexparser.loads(record, bib_parser) records += 1 if records % 1000 == 0: print("Processed {} records.".format(records), file=sys.stderr) except (pyparsing.ParseException, bibtexparser.bibdatabase.UndefinedString): skipped += 1 for item in parsed.get_entry_list(): if "abstract" in item: del item["abstract"] parsed.comments = [] parsed.entries = [e for e in parsed.entries if e["ENTRYTYPE"] != "book"] parsed.entries = list(parsed.get_entry_dict().values()) print(writer.write(parsed)) print("Finished. {} records kept, {} skipped.".format(records, skipped), file=sys.stderr)