Beispiel #1
0
def format_folder(folder, use_backup, context=None):
    """
    Pretty print bibtex file in the given folder.
    This function looks for a file named 'queried.bib' in the given folder,
    and use it as an input to pretty print a file called 'biblio.bib'.

    Args:
        folder (str): absolute or relative path to the folder to process.
        use_backup (bool): whether to backup previous files before writing.
        context (set): set of existing bibtex keys in the current context.

    Returns:
        Nothing, but writes the results in a file called 'biblio.bib'.
    """

    # Create database
    db = utils.read_bib_file(os.path.join(folder, '.queried.bib'), homogenize=True)
    utils.guess_manual_files(folder, db, update_queried_db=True)

    if context is None:
        context = set()

    # Generate bibkeys
    for entry in db.entries:
        entry['ID'] = nomenclature.gen_bibkey(entry, context)

    # Write output bibtex file
    output_bib_path = os.path.join(folder, 'biblio.bib')
    output_bib_str = utils.write_bib(db, order=True)
    utils.write_with_backup(output_bib_path, output_bib_str, use_backup)
Beispiel #2
0
def format_folder(folder, use_backup, context=None):
    """
    Pretty print bibtex file in the given folder.
    This function looks for a file named 'queried.bib' in the given folder,
    and use it as an input to pretty print a file called 'biblio.bib'.

    Args:
        folder (str): absolute or relative path to the folder to process.
        use_backup (bool): whether to backup previous files before writing.
        context (set): set of existing bibtex keys in the current context.

    Returns:
        Nothing, but writes the results in a file called 'biblio.bib'.
    """

    # Create database
    db = utils.read_bib_file(os.path.join(folder, '.queried.bib'),
                             homogenize=True)
    utils.guess_manual_files(folder, db, update_queried_db=True)

    if context is None:
        context = set()

    # Generate bibkeys
    for entry in db.entries:
        entry['ID'] = nomenclature.gen_bibkey(entry, context)

    # Write output bibtex file
    output_bib_path = os.path.join(folder, 'biblio.bib')
    output_bib_str = utils.write_bib(db, order=True)
    utils.write_with_backup(output_bib_path, output_bib_str, use_backup)
Beispiel #3
0
def query_crossref_folder(folder, use_backup):
    """
    Query metadata information for unmatched pdf files in the given folder.
    This function only queries Crossref.

    Args:
        folder (str): absolute or relative path to the folder to process.
        use_backup (bool): whether to backup previous files before writing.

    Returns:
        Nothing, but writes the queried databases in bibtex format in the given
        folder (and backup previous database if it differed).
    """

    # Create database
    db = utils.read_bib_file(os.path.join(folder, '.queried.bib'))
    files = utils.guess_manual_files(folder, db, update_queried_db=False)
    utils.add_skip_files(folder, files)
    json_entries = []
    rejected = []

    # For each pdf in the folder
    import pdb
    pdb.set_trace()
    for path in utils.get_pdf_list(folder):
        file = os.path.basename(path)
        parsed = nomenclature.parse_filename(file)
        if parsed is None or file in files:
            continue
        print('Q: ' + os.path.basename(file))
        authors, title = parsed

        # Crossref
        rbib, rjson, score = providers.crossref_query(authors, title)
        if score >= config.crossref_accept_threshold:
            # Append filename and store entry
            rbib['file'] = utils.encode_filename_field(file)
            json_entries.append(rjson)
            db.entries.append(rbib)
        else:
            rejected.append(os.path.basename(file))

    # Store results
    bib_path = os.path.join(folder, '.queried.bib')
    utils.write_with_backup(bib_path, utils.write_bib(db, order=False),
                            use_backup)
    json_path = os.path.join(folder, '.queried.json')
    json_str = json.dumps(json_entries,
                          sort_keys=True,
                          indent=4,
                          separators=(',', ': '))
    utils.write_with_backup(json_path, json_str, use_backup)
    rejected_path = os.path.join(folder, '.rejected.txt')
    if len(rejected) > 0:
        utils.write_with_backup(rejected_path, '\n'.join(rejected), use_backup)
Beispiel #4
0
def query_crossref_folder(folder, use_backup):
    """
    Query metadata information for unmatched pdf files in the given folder.
    This function only queries Crossref.

    Args:
        folder (str): absolute or relative path to the folder to process.
        use_backup (bool): whether to backup previous files before writing.

    Returns:
        Nothing, but writes the queried databases in bibtex format in the given
        folder (and backup previous database if it differed).
    """

    # Create database
    db = utils.read_bib_file(os.path.join(folder, '.queried.bib'))
    files = utils.guess_manual_files(folder, db, update_queried_db=False)
    utils.add_skip_files(folder, files)
    json_entries = []
    rejected = []

    # For each pdf in the folder
    for path in utils.get_pdf_list(folder):
        file = os.path.basename(path)
        parsed = nomenclature.parse_filename(file)
        if parsed is None or file in files:
            continue
        print('Q: ' + os.path.basename(file))
        authors, title = parsed

        # Crossref
        rbib, rjson, score = providers.crossref_query(authors, title)
        if score >= config.crossref_accept_threshold:
            # Append filename and store entry
            rbib['file'] = utils.encode_filename_field(file)
            json_entries.append(rjson)
            db.entries.append(rbib)
        else:
            rejected.append(os.path.basename(file))

    # Store results
    bib_path = os.path.join(folder, '.queried.bib')
    utils.write_with_backup(bib_path, utils.write_bib(db, order=False), use_backup)
    json_path = os.path.join(folder, '.queried.json')
    json_str = json.dumps(json_entries, sort_keys=True, indent=4, separators=(',', ': '))
    utils.write_with_backup(json_path, json_str, use_backup)
    rejected_path = os.path.join(folder, '.rejected.txt')
    if len(rejected) > 0:
        utils.write_with_backup(rejected_path, '\n'.join(rejected), use_backup)
Beispiel #5
0
def query_google_folder(folder, use_backup):
    """
    Query metadata information for unmatched pdf files in the given folder.
    This function only queries Google Scholar.

    Args:
        folder (str): absolute or relative path to the folder to process.
        use_backup (bool): whether to backup previous files before writing.

    Returns:
        Nothing, but writes the queried databases in bibtex format in the given
        folder (and backup previous database if it differed).
    """

    # Create database
    db = utils.read_bib_file(os.path.join(folder, '.queried.bib'))
    files = utils.guess_manual_files(folder, db, update_queried_db=False)
    utils.add_skip_files(folder, files)

    for path in utils.get_pdf_list(folder):
        file = os.path.basename(path)
        parsed = nomenclature.parse_filename(file)
        if parsed is None or file in files:
            continue
        print('Q: ' + os.path.basename(file))
        authors, title = parsed

        # Google Scholar
        rbib = providers.scholarly_query(authors, title)
        if rbib is None:
            continue

        # Append filename and store entry
        rbib['file'] = utils.encode_filename_field(file)
        db.entries.append(rbib)

    # Store results
    bib_path = os.path.join(folder, '.queried.bib')
    utils.write_with_backup(bib_path, utils.write_bib(db, order=False),
                            use_backup)
Beispiel #6
0
def query_google_folder(folder, use_backup):
    """
    Query metadata information for unmatched pdf files in the given folder.
    This function only queries Google Scholar.

    Args:
        folder (str): absolute or relative path to the folder to process.
        use_backup (bool): whether to backup previous files before writing.

    Returns:
        Nothing, but writes the queried databases in bibtex format in the given
        folder (and backup previous database if it differed).
    """

    # Create database
    db = utils.read_bib_file(os.path.join(folder, '.queried.bib'))
    files = utils.guess_manual_files(folder, db, update_queried_db=False)
    utils.add_skip_files(folder, files)

    for path in utils.get_pdf_list(folder):
        file = os.path.basename(path)
        parsed = nomenclature.parse_filename(file)
        if parsed is None or file in files:
            continue
        print('Q: ' + os.path.basename(file))
        authors, title = parsed

        # Google Scholar
        rbib = providers.scholarly_query(authors, title)
        if rbib is None:
            continue

        # Append filename and store entry
        rbib['file'] = utils.encode_filename_field(file)
        db.entries.append(rbib)

    # Store results
    bib_path = os.path.join(folder, '.queried.bib')
    utils.write_with_backup(bib_path, utils.write_bib(db, order=False), use_backup)