def fetch_entry(e): if 'doi' in e and isvaliddoi(e['doi']): bibtex = fetch_bibtex_by_doi(e['doi']) else: kw = {} if e.get('author', ''): kw['author'] = latex_to_unicode(family_names(e['author'])) if e.get('title', ''): kw['title'] = latex_to_unicode(family_names(e['title'])) if kw: bibtex = fetch_bibtex_by_fulltext_crossref('', **kw) else: ValueError('no author not title field') db = bibtexparser.loads(bibtex) return db.entries[0]
def main(): """Main entry function """ global args, logger pdfs = [] if args.input.endswith(".pdf"): pdfs = [args.input] elif not args.recursive: pdfs = [f for f in glob.glob("%s/*.pdf" % args.input)] else: pdfs = [] for (dirpath, dirnames, filenames) in os.walk(args.input): for name in filenames: if name.endswith(".pdf"): pdfs.append("%s/%s" % (dirpath, name)) for cur_pdf in pdfs: print("") print( "# ======================================================================" ) print("# Renaming %s..." % cur_pdf) print( "# ======================================================================" ) try: bibtex = extract_pdf_metadata(cur_pdf, search_doi=False, search_fulltext=True, scholar=False, minwords=200, max_query_words=200) bib = bibtexparser.loads(bibtex) entry = bib.entries[0] # Generate accurate format for author fam_names = family_names(entry.get('author', 'unknown').lower()) try: fir_names = first_names(entry.get('author', 'unknown').lower()) except Exception: raise Exception( "The following author entry doesn't contain proper author names: \"%s\"" % (entry.get('author', 'unknown'))) if (not fam_names) or (fam_names[0] == "unknown"): logger.warning("%s doesn't have proper author: \"%s\"" % (cur_pdf, str(fam_names))) continue formatted_name = "%s. %s" % (fir_names[0][0].capitalize(), fam_names[0].capitalize()) final_name = "%s - %s - %s.pdf" % (entry["year"], formatted_name, entry["title"]) os.makedirs(args.output_dir, exist_ok=True) shutil.move(cur_pdf, "%s/%s" % (args.output_dir, final_name)) except Exception as ex: if args.failed_dir is not None: os.makedirs(args.failed_dir, exist_ok=True) shutil.move( cur_pdf, "%s/%s" % (args.failed_dir, os.path.basename(cur_pdf))) logger.error("Ignored as cannot rename \"%s\": %s" % (cur_pdf, ex)) logger.error(str(ex)) traceback.print_exc(file=sys.stderr)