コード例 #1
0
ファイル: save_crops.py プロジェクト: edwbaker/inselect
def save_crops(dir, overwrite_existing, template):
    dir = Path(dir)
    export = DocumentExport(UserTemplate.load(template) if template else DWC)
    for p in dir.glob('*' + InselectDocument.EXTENSION):
        try:
            debug_print('Loading [{0}]'.format(p))
            doc = InselectDocument.load(p)
            validation = export.validation_problems(doc)
            if validation.any_problems:
                print(
                    u'Not saving crops for [{0}] because there are validation '
                    u'problems'.format(p)
                )
                for msg in format_validation_problems(validation):
                    print(msg)
            elif not overwrite_existing and doc.crops_dir.is_dir():
                print(u'Crops dir [{0}] exists - skipping'.format(doc.crops_dir))
            else:
                print(u'Will save crops for [{0}] to [{1}]'.format(p, doc.crops_dir))

                debug_print(u'Loading full-resolution scanned image')
                doc.scanned.array

                debug_print(u'Saving crops')
                export.save_crops(doc)
        except Exception:
            print(u'Error saving crops from [{0}]'.format(p))
            traceback.print_exc()
コード例 #2
0
def save_crops(dir, overwrite_existing, template):
    dir = Path(dir)
    export = DocumentExport(UserTemplate.load(template) if template else DWC)
    for p in dir.glob('*' + InselectDocument.EXTENSION):
        try:
            debug_print('Loading [{0}]'.format(p))
            doc = InselectDocument.load(p)
            validation = export.validation_problems(doc)
            if validation.any_problems:
                print(
                    'Not saving crops for [{0}] because there are validation '
                    'problems'.format(p)
                )
                for msg in format_validation_problems(validation):
                    print(msg)
            elif not overwrite_existing and doc.crops_dir.is_dir():
                print('Crops dir [{0}] exists - skipping'.format(doc.crops_dir))
            else:
                print('Will save crops for [{0}] to [{1}]'.format(p, doc.crops_dir))

                debug_print('Loading full-resolution scanned image')
                doc.scanned.array

                debug_print('Saving crops')
                export.save_crops(doc)
        except KeyboardInterrupt:
            raise
        except Exception:
            print('Error saving crops from [{0}]'.format(p))
            traceback.print_exc()
コード例 #3
0
    def test_csv_export(self):
        "CSV data are exported as expected"
        with temp_directory_with_files(TESTDATA / 'shapes.inselect',
                                       TESTDATA / 'shapes.png') as tempdir:
            doc = InselectDocument.load(tempdir / 'shapes.inselect')

            csv_path = DocumentExport(self.TEMPLATE).export_csv(doc)
            self.assertEqual(csv_path, tempdir / 'shapes.csv')

            # Check CSV contents

            with csv_path.open('rb') as f:
                reader = unicodecsv.reader(f, encoding='utf8')
                headers = [
                    'Cropped_image_name', 'ItemNumber',
                    'NormalisedLeft', 'NormalisedTop', 'NormalisedRight',
                    'NormalisedBottom', 'ThumbnailLeft', 'ThumbnailTop',
                    'ThumbnailRight', 'ThumbnailBottom', 'OriginalLeft',
                    'OriginalTop', 'OriginalRight', 'OriginalBottom',
                    'catalogNumber', 'Department', 'scientificName',
                    'scientificName-value'
                ]
                self.assertEqual(headers, next(reader))

                # Check only the metadata columns and 'original' coordinates
                # columns, ignoring thumbnail (which doesn't exist)
                # and normalised (which are floating point) coordinates
                metadata_cols = itemgetter(0, 1, 10, 11, 12, 13, 14, 15, 16, 17)
                self.assertEqual(
                    ('01_1.png', '1',
                     '0', '0', '189', '189',
                     '1', 'Entomology', 'A', '1'),
                    metadata_cols(next(reader))
                )
                self.assertEqual(
                    ('02_2.png', '2',
                     '271', '0', '459', '189',
                     '2', 'Entomology', 'B', '2'),
                    metadata_cols(next(reader))
                )
                self.assertEqual(
                    ('03_10.png', '3',
                     '194', '196', '257', '232',
                     '3', 'Entomology', 'インセクト', '10'),
                    metadata_cols(next(reader))
                )
                self.assertEqual(
                    ('04_3.png', '4',
                     '0', '248', '189', '437',
                     '4', 'Entomology', 'Elsinoë', '3'),
                    metadata_cols(next(reader))
                )
                self.assertEqual(
                    ('05_4.png', '5',
                     '271', '248', '459', '437',
                     '5', 'Entomology', 'D', '4'),
                    metadata_cols(next(reader))
                )
                self.assertIsNone(next(reader, None))
コード例 #4
0
    def test_cancel_save_crops(self):
        "User cancels save crops"
        with temp_directory_with_files(TESTDATA / 'shapes.inselect',
                                       TESTDATA / 'shapes.png') as tempdir:
            doc = InselectDocument.load(tempdir / 'shapes.inselect')

            # Create crops dir with some data
            doc.crops_dir.mkdir()
            with doc.crops_dir.joinpath('a_file').open('w') as outfile:
                outfile.write('Some data\n')

            class CancelExport(Exception):
                pass

            def progress(msg):
                "A progress function that cancels the export"
                raise CancelExport()

            self.assertRaises(CancelExport,
                              DocumentExport(self.TEMPLATE).save_crops,
                              doc,
                              progress=progress)

            # Nothing should have changed within tempdir
            self.assertEqual(
                ['shapes.inselect', 'shapes.png', doc.crops_dir.name],
                sorted(p.name for p in tempdir.iterdir()))
            self.assertEqual(['a_file'],
                             [p.name for p in doc.crops_dir.iterdir()])
コード例 #5
0
    def test_csv_export(self):
        "CSV data are exported as expected"
        with temp_directory_with_files(TESTDATA / 'shapes.inselect',
                                       TESTDATA / 'shapes.png') as tempdir:
            doc = InselectDocument.load(tempdir / 'shapes.inselect')

            csv_path = DocumentExport(self.TEMPLATE).export_csv(doc)
            self.assertEqual(csv_path, tempdir / 'shapes.csv')

            # Check CSV contents

            with csv_path.open('rb') as f:
                reader = unicodecsv.reader(f, encoding='utf8')
                headers = [
                    'Cropped_image_name', 'ItemNumber', 'NormalisedLeft',
                    'NormalisedTop', 'NormalisedRight', 'NormalisedBottom',
                    'ThumbnailLeft', 'ThumbnailTop', 'ThumbnailRight',
                    'ThumbnailBottom', 'OriginalLeft', 'OriginalTop',
                    'OriginalRight', 'OriginalBottom', 'catalogNumber',
                    'Department', 'scientificName', 'scientificName-value'
                ]
                self.assertEqual(headers, next(reader))

                # Check only the metadata columns and 'original' coordinates
                # columns, ignoring thumbnail (which doesn't exist)
                # and normalised (which are floating point) coordinates
                metadata_cols = itemgetter(0, 1, 10, 11, 12, 13, 14, 15, 16,
                                           17)
                self.assertEqual(('01_1.png', '1', '0', '0', '189', '189', '1',
                                  'Entomology', 'A', '1'),
                                 metadata_cols(next(reader)))
                self.assertEqual(('02_2.png', '2', '271', '0', '459', '189',
                                  '2', 'Entomology', 'B', '2'),
                                 metadata_cols(next(reader)))
                self.assertEqual(('03_10.png', '3', '194', '196', '257', '232',
                                  '3', 'Entomology', 'インセクト', '10'),
                                 metadata_cols(next(reader)))
                self.assertEqual(('04_3.png', '4', '0', '248', '189', '437',
                                  '4', 'Entomology', 'Elsinoë', '3'),
                                 metadata_cols(next(reader)))
                self.assertEqual(('05_4.png', '5', '271', '248', '459', '437',
                                  '5', 'Entomology', 'D', '4'),
                                 metadata_cols(next(reader)))
                self.assertIsNone(next(reader, None))
コード例 #6
0
def export_csv(dir, overwrite_existing, template):
    dir = Path(dir)
    export = DocumentExport(UserTemplate.load(template) if template else DWC)
    for p in dir.glob('*' + InselectDocument.EXTENSION):
        try:
            debug_print('Loading [{0}]'.format(p))
            doc = InselectDocument.load(p)
            validation = export.validation_problems(doc)
            csv_path = export.csv_path(doc)
            if validation.any_problems:
                print(
                    'Not exporting metadata for [{0}] because there are '
                    'validation problems'.format(p)
                )
                for msg in format_validation_problems(validation):
                    print(msg)
            elif not overwrite_existing and csv_path.is_file():
                print('CSV file [{0}] exists - skipping'.format(csv_path))
            else:
                print('Writing CSV for [{0}]'.format(p))
                export.export_csv(doc)
        except KeyboardInterrupt:
            raise
        except Exception:
            print('Error saving CSV from [{0}]'.format(p))
            traceback.print_exc()
コード例 #7
0
    def test_save_crops(self):
        "Cropped object images are written correctly"
        with temp_directory_with_files(TESTDATA / 'shapes.inselect',
                                       TESTDATA / 'shapes.png') as tempdir:
            doc = InselectDocument.load(tempdir / 'shapes.inselect')

            crops_dir = DocumentExport(self.TEMPLATE).save_crops(doc)

            self.assertTrue(crops_dir.is_dir())
            self.assertEqual(crops_dir, doc.crops_dir)

            cropped_fnames = sorted(crops_dir.glob('*.png'))
            self.assertEqual(
                ['01_1.png', '02_2.png', '03_10.png', '04_3.png', '05_4.png'],
                [f.name for f in cropped_fnames])

            # Check the contents of each file
            boxes = doc.scanned.from_normalised(i['rect'] for i in doc.items)
            for box, path in zip(boxes, sorted(crops_dir.glob('*.png'))):
                x0, y0, x1, y1 = box.coordinates
                self.assertTrue(
                    np.all(doc.scanned.array[y0:y1,
                                             x0:x1] == cv2.imread(str(path))))
コード例 #8
0
    def test_save_crops(self):
        "Cropped object images are written correctly"
        with temp_directory_with_files(TESTDATA / 'shapes.inselect',
                                       TESTDATA / 'shapes.png') as tempdir:
            doc = InselectDocument.load(tempdir / 'shapes.inselect')

            crops_dir = DocumentExport(self.TEMPLATE).save_crops(doc)

            self.assertTrue(crops_dir.is_dir())
            self.assertEqual(crops_dir, doc.crops_dir)

            cropped_fnames = sorted(crops_dir.glob('*.png'))
            self.assertEqual(
                ['01_1.png', '02_2.png', '03_10.png', '04_3.png', '05_4.png'],
                [f.name for f in cropped_fnames]
            )

            # Check the contents of each file
            boxes = doc.scanned.from_normalised(i['rect'] for i in doc.items)
            for box, path in zip(boxes, sorted(crops_dir.glob('*.png'))):
                x0, y0, x1, y1 = box.coordinates
                self.assertTrue(np.all(doc.scanned.array[y0:y1, x0:x1] ==
                                       cv2.imread(str(path))))
コード例 #9
0
    def test_fname_collison(self):
        "Duplicated crop fnames have numerical suffixes to avoid collisions"

        class FakeDocument(object):
            pass

        document = FakeDocument()
        document.items = [{
            "fields": {
                "scientificName": "A"
            },
        }, {
            "fields": {
                "scientificName": "A"
            },
        }, {
            "fields": {
                "scientificName": "A"
            },
        }, {
            "fields": {
                "scientificName": "D"
            },
        }, {
            "fields": {
                "scientificName": "B"
            },
        }, {
            "fields": {
                "scientificName": "D"
            },
        }, {
            "fields": {
                "scientificName": "A"
            },
        }]

        fnames = list(DocumentExport(self.TEMPLATE).crop_fnames(document))
        self.assertEqual([
            'A.png', 'A-1.png', 'A-2.png', 'D.png', 'B.png', 'D-1.png',
            'A-3.png'
        ], fnames)