def test_image_support(self): results = pdf_extraction(PDF, images=True) assert 'text' in results.keys() assert 'metadata' in results.keys() assert 'images' in results.keys() assert isinstance(results.get('images'), list) assert isinstance(results.get('images')[0], PIL.JpegImagePlugin.JpegImageFile)
def test_url_support(self): url = "https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf" results = pdf_extraction(url) assert 'text' in results.keys() assert 'metadata' in results.keys() assert isinstance(results.get('text'), six.string_types) assert isinstance(results.get('metadata'), dict)
def test_image_support(self): results = pdf_extraction(PDF, images=True) assert 'text' in results.keys() assert 'metadata' in results.keys() assert 'images' in results.keys() assert isinstance(results.get('images'), list) assert isinstance(results.get('images')[0], PpmImageFile)
def test_image_support(self): results = pdf_extraction(PDF, raw_text=True, images=True) assert "raw_text" in results.keys() assert "metadata" in results.keys() assert "images" in results.keys() assert isinstance(results.get("images"), list) assert isinstance( results.get("images")[0], PIL.JpegImagePlugin.JpegImageFile)
def test_table_support(self): results = pdf_extraction(PDF, tables=True) assert 'text' in results.keys() assert 'metadata' in results.keys() assert 'tables' in results.keys() assert isinstance(results.get('tables'), list)
def test_pdf_extraction(self): results = pdf_extraction(PDF) assert 'text' in results.keys() assert 'metadata' in results.keys() assert isinstance(results.get('text'), six.string_types) assert isinstance(results.get('metadata'), dict)
import indicoio, os, json indicoio.config.api_key = '27df1eee04c5b65fb3113e9458d1d701' fileDir = os.path.dirname(os.path.realpath('__file__')) fileResumeTxt = open(os.path.join(fileDir, "data/resume.txt"), 'w') resume = "data/resumePDF.pdf" print(json.dumps(indicoio.pdf_extraction(resume)))
def test_table_support(self): results = pdf_extraction(PDF, raw_text=True, tables=True) assert "raw_text" in results.keys() assert "metadata" in results.keys() assert "tables" in results.keys() assert isinstance(results.get("tables"), list)
def test_pdf_extraction_batch(self): results = pdf_extraction([PDF]) assert isinstance(results, list)
def test_pdf_extraction(self): results = pdf_extraction(PDF, raw_text=True) assert "raw_text" in results.keys() assert "metadata" in results.keys() assert isinstance(results.get("raw_text"), six.string_types) assert isinstance(results.get("metadata"), dict)