def main(): parser = argparse.ArgumentParser() parser.add_argument( 'filename', help='PDF file to process.', type=str, ) parser.add_argument( '-v', '--verbose', action='store_true', dest='verbose', help='Print status messages.', ) parser.add_argument( '-t', '--test', action='store_true', help= "Test only. Don't output any files. Use with debug options to see test output.", ) parser.add_argument( '-d', '--debug', action='store_true', dest='debug', help="Show debug output.", default=0, ) parser.add_argument( '-o', '--output-file', dest='destination', type=str, help='Use supplied filename as filename template for output files.', ) args = parser.parse_args() # Split filename from extension before passing to the various functions. Use input filename for template # if no output filename specified. if args.destination: output_file, output_extension = os.path.splitext(args.destination) else: output_file, output_extension = os.path.splitext(args.filename) # Fetch metadata from import Excel file title, start_page, start_pdf_page, end_pdf_page, author, section = importxl( args.filename) # Export CSV file, or show what output would be if test flag is set exportcsvnew(output_file, args.verbose, args.debug, args.test, title, start_page, start_pdf_page, end_pdf_page, author, section=section)
def main(): parser = argparse.ArgumentParser( description='Function to extract metadata and split a PDF containing multiple journal articles. Set up for ' 'Buffalo Law Review book reviews.' ) parser.add_argument('filename', help='PDF file to analyze and split.', type=str, ) parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help='Print status messages.', ) parser.add_argument('-t', '--test', action='store_true', help="Test only. Don't output any files. Use with debug options to see test output.", ) parser.add_argument('--write-csv-only', action="store_true", dest="csvOnly", help="Write CSV file, but don't split PDFs. Test flag takes precedence.", ) parser.add_argument('-d', '--debug', dest='debug', type=int, help="Set debug level (1-6). Levels 1-4 offer increasing levels of output. Level 5 displays " "the PDF text. Level 6 prints all of the records.", default=0, ) parser.add_argument('-o', '--output-file', dest='destination', type=str, help='Use supplied filename as filename template for output files.', ) parser.add_argument('-i', '--input-file', dest='input_file', type=str, help="Import CSV file to be used for PDF splitting. Must be in same format as export.") args = parser.parse_args() # Split filename from extension before passing to the various functions. Use input filename for template # if no output filename specified. if args.destination: output_file, output_extension = os.path.splitext(args.destination) else: output_file, output_extension = os.path.splitext(args.filename) # If importCSV is specified, read that file and get start_pdf_page and end_pdf_page to pass to SplitPDFs # If no importCSV is selected, process args.filename if args.input_file: start_pdf_page, end_pdf_page = journaltools.importcsv(args.input_file, args.debug) else: # Fetch OCR page text from PDF file page_text = journaltools.getpdf(args.filename, 0, args.verbose, args.debug) # Process pages title, start_page, start_pdf_page, end_pdf_page, author = processpdfnew( args.verbose, args.debug, page_text) # Export CSV file, or show what output would be if test flag is set journaltools.exportcsvnew(output_file, args.verbose, args.debug, args.test, title, start_page, start_pdf_page, end_pdf_page, author) # Split Original PDF into separate documents for each piece, unless test or csvOnly flags are set if not args.test and not args.csvOnly: journaltools.splitpdf(args.filename, args.verbose, args.debug, start_pdf_page, end_pdf_page, output_file)