def pdf2array(pdffile, res=300): """ read pdf file and convert it to numpy array return list of pages and dimensions color order is BGR """ tname = tmpname() + '.tif' # Fist 5 pages only p = Popen([ "gs", "-dQUIET", "-dNOPAUSE", '-dFirstPage=1', '-dLastPage=5', "-sDEVICE=tiff24nc", '-dBATCH', "-r" + str(res), "-sOutputFile=" + tname, pdffile ], stdout=DEVNULL, stderr=DEVNULL) p.communicate() if not os.path.exists(tname): return None, None imgfile = TiffFile(tname) pages = [p.asarray() for p in imgfile.pages] shapes = [p.shape for p in imgfile.pages] os.remove(tname) return pages, shapes