def ingest_from_directory(inbox, docs, thumbnail_width_pixels=InselectDocument.THUMBNAIL_DEFAULT_WIDTH, cookie_cutter=None): """Ingest images from the directory given by inbox to the directory given by docs """ inbox, docs = Path(inbox), Path(docs) cookie_cutter = Path(cookie_cutter) if cookie_cutter else None if not inbox.is_dir(): raise InselectError('Inbox directory [{0}] does not exist'.format(inbox)) if not docs.is_dir(): print('Create document directory [{0}]'.format(docs)) docs.mkdir(parents=True) if cookie_cutter: cookie_cutter = CookieCutter.load(cookie_cutter) for source in (p for p in inbox.iterdir() if IMAGE_SUFFIXES_RE.match(p.name)): print('Ingesting [{0}]'.format(source)) try: ingest_image( source, docs, thumbnail_width_pixels=thumbnail_width_pixels, cookie_cutter=cookie_cutter ) except KeyboardInterrupt: raise except Exception: print('Error ingesting [{0}]'.format(source)) traceback.print_exc() else: print('Ingested [{0}]'.format(source))
def test_segment(self): "Segment an existing document" with temp_directory_with_files(TESTDATA / 'test_segment.png') as tempdir: # Create a new document ingest_image(tempdir / 'test_segment.png', tempdir) main([unicode(tempdir)]) doc_path = tempdir / 'test_segment.inselect' self.assertTrue(doc_path.is_file()) doc = InselectDocument.load(doc_path) self.assertEqual(5, len(doc.items))
def test_shapes(self): "Segment an existing document" with temp_directory_with_files(TESTDATA / 'shapes.png') as tempdir: # Create a new document ingest_image(tempdir / 'shapes.png', tempdir) main([str(tempdir)]) doc_path = tempdir / 'shapes.inselect' self.assertTrue(doc_path.is_file()) doc = InselectDocument.load(doc_path) self.assertEqual(5, len(doc.items))
def ingest_from_directory(inbox, docs): """Ingest images from the directory given by inbox to the directory given by docs """ inbox, docs = Path(inbox), Path(docs) if not inbox.is_dir(): raise InselectError( 'Inbox directory [{0}] does not exist'.format(inbox)) if not docs.is_dir(): print('Create document directory [{0}]'.format(docs)) docs.mkdir(parents=True) # TODO LH Case insensitive matching for source in apply(chain, [inbox.glob(p) for p in IMAGE_PATTERNS]): try: ingest_image(source, docs) except Exception: print('Error ingesting [{0}]'.format(source)) traceback.print_exc()
def ingest_from_directory( inbox, docs, thumbnail_width_pixels=InselectDocument.THUMBNAIL_DEFAULT_WIDTH, cookie_cutter=None): """Ingest images from the directory given by inbox to the directory given by docs """ inbox, docs = Path(inbox), Path(docs) cookie_cutter = Path(cookie_cutter) if cookie_cutter else None if not inbox.is_dir(): raise InselectError( 'Inbox directory [{0}] does not exist'.format(inbox)) if not docs.is_dir(): print('Create document directory [{0}]'.format(docs)) docs.mkdir(parents=True) if cookie_cutter: cookie_cutter = CookieCutter.load(cookie_cutter) for source in (p for p in inbox.iterdir() if IMAGE_SUFFIXES_RE.match(p.name)): print('Ingesting [{0}]'.format(source)) try: ingest_image(source, docs, thumbnail_width_pixels=thumbnail_width_pixels, cookie_cutter=cookie_cutter) except KeyboardInterrupt: raise except Exception: print('Error ingesting [{0}]'.format(source)) traceback.print_exc() else: print('Ingested [{0}]'.format(source))
def __call__(self, progress): progress('Creating thumbnail of scanned image') doc = ingest_image(self.image, self.image.parent) self.document_path = doc.document_path