Esempio n. 1
0
def fetch_entry(e):
    if 'doi' in e and isvaliddoi(e['doi']):
        bibtex = fetch_bibtex_by_doi(e['doi'])
    else:
        kw = {}
        if e.get('author', ''):
            kw['author'] = latex_to_unicode(family_names(e['author']))
        if e.get('title', ''):
            kw['title'] = latex_to_unicode(family_names(e['title']))
        if kw:
            bibtex = fetch_bibtex_by_fulltext_crossref('', **kw)
        else:
            ValueError('no author not title field')
    db = bibtexparser.loads(bibtex)
    return db.entries[0]
Esempio n. 2
0
def main():
    """Main entry function
    """
    global args, logger

    pdfs = []
    if args.input.endswith(".pdf"):
        pdfs = [args.input]
    elif not args.recursive:
        pdfs = [f for f in glob.glob("%s/*.pdf" % args.input)]
    else:
        pdfs = []
        for (dirpath, dirnames, filenames) in os.walk(args.input):
            for name in filenames:
                if name.endswith(".pdf"):
                    pdfs.append("%s/%s" % (dirpath, name))

    for cur_pdf in pdfs:
        print("")
        print(
            "# ======================================================================"
        )
        print("# Renaming %s..." % cur_pdf)
        print(
            "# ======================================================================"
        )
        try:
            bibtex = extract_pdf_metadata(cur_pdf,
                                          search_doi=False,
                                          search_fulltext=True,
                                          scholar=False,
                                          minwords=200,
                                          max_query_words=200)

            bib = bibtexparser.loads(bibtex)
            entry = bib.entries[0]

            # Generate accurate format for author
            fam_names = family_names(entry.get('author', 'unknown').lower())
            try:
                fir_names = first_names(entry.get('author', 'unknown').lower())
            except Exception:
                raise Exception(
                    "The following author entry doesn't contain proper author names: \"%s\""
                    % (entry.get('author', 'unknown')))

            if (not fam_names) or (fam_names[0] == "unknown"):
                logger.warning("%s doesn't have proper author: \"%s\"" %
                               (cur_pdf, str(fam_names)))
                continue

            formatted_name = "%s. %s" % (fir_names[0][0].capitalize(),
                                         fam_names[0].capitalize())
            final_name = "%s - %s - %s.pdf" % (entry["year"], formatted_name,
                                               entry["title"])

            os.makedirs(args.output_dir, exist_ok=True)
            shutil.move(cur_pdf, "%s/%s" % (args.output_dir, final_name))
        except Exception as ex:
            if args.failed_dir is not None:
                os.makedirs(args.failed_dir, exist_ok=True)
                shutil.move(
                    cur_pdf,
                    "%s/%s" % (args.failed_dir, os.path.basename(cur_pdf)))
            logger.error("Ignored as cannot rename \"%s\": %s" % (cur_pdf, ex))
            logger.error(str(ex))
            traceback.print_exc(file=sys.stderr)