Beispiel #1
0
    def test_NewFromDirectory( self ):
        """"""

        ref_sig_path = 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.sig'
        ref_fv = FeatureVector.NewFromSigFile( pychrm_test_dir + sep + ref_sig_path )
        from shutil import copy
        tempdir = mkdtemp()
        img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.tiff"
        orig_img_filepath = pychrm_test_dir + sep + img_filename
        copy( orig_img_filepath, tempdir )
        try:
            fs = FeatureSpace.NewFromDirectory( tempdir, quiet=False )
            self.assertTrue( compare( fs.data_matrix[0], ref_fv.values ) )
            #from numpy.testing import assert_allclose
            #assert_allclose( ref_fv.values, fs.data_matrix[0], rtol=1e-05 )
        finally:
            rmtree( tempdir )

        from os import mkdir
        toptempdir = mkdtemp()
        try:
            class_names = []
            for letter in 'CBA':
                dirname = toptempdir + sep + letter
                mkdir( dirname )
                copy( orig_img_filepath, dirname )

            fs = FeatureSpace.NewFromDirectory( toptempdir, quiet=False, )
            self.assertEqual( fs.class_names, ['A', 'B', 'C' ] )
            for row_of_features in fs.data_matrix:
                self.assertTrue( compare( row_of_features, ref_fv.values ) )

        finally:
            rmtree( toptempdir )
    def test_ParallelTiling( self ):
        """Specify bounding box to FeatureVector, calc features, then compare
        with C++ implementation-calculated feats."""

        import zipfile
        from shutil import copy
        from tempfile import NamedTemporaryFile

        refdir = mkdtemp(prefix='ref') 
        targetdir = mkdtemp(prefix='target')

        try:
            reference_feats = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile( reference_feats, mode='r' )
            zf.extractall( refdir )

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            # copy the tiff to the tempdir so the .sig files end up there too
            copy( orig_img_filepath, targetdir )
            copy( orig_img_filepath, refdir )
            input_image_path = targetdir + sep + img_filename

            with NamedTemporaryFile( mode='w', dir=refdir, prefix='ref', delete=False ) as temp:
                ref_fof = temp.name
                temp.write( 'reference_samp\ttest_class\t{}\t{{}}\n'.format( refdir + sep + img_filename ) )
            with NamedTemporaryFile( mode='w', dir=targetdir, prefix='target', delete=False ) as temp:
                target_fof = temp.name
                temp.write( 'test_samp\ttest_class\t{}\t{{}}\n'.format( targetdir + sep + img_filename ) )

            global_sampling_options = \
                FeatureVector( long=True, tile_num_cols=6, tile_num_rows=5 )

            # Should just load reference sigs
            ref_fs = FeatureSpace.NewFromFileOfFiles( ref_fof, quiet=False,
                 global_sampling_options=global_sampling_options )
            target_fs = FeatureSpace.NewFromFileOfFiles( target_fof, n_jobs=True,
                 quiet=False, global_sampling_options=global_sampling_options )

            #from numpy.testing import assert_allclose
            #self.assertTrue( assert_allclose( ref_fs.data_matrix, target_fs.data_matrix ) )
            from wndcharm.utils import compare
            for row_num, (ref_row, test_row) in enumerate( zip( ref_fs.data_matrix, target_fs.data_matrix )):
                retval = compare( ref_row, test_row )
                if retval == False:
                    print "error in sample row", row_num
                    print "FIT: ", ref_fs._contiguous_sample_names[row_num], "FOF", target_fs._contiguous_sample_names[row_num]
                self.assertTrue( retval )
        finally:
            rmtree( refdir )
            rmtree( targetdir )
    def test_LargeFeatureSetGrayscale( self ):
        """Large feature set, grayscale image"""
        reference_sample = FeatureVector.NewFromSigFile( self.sig_file_path,
            image_path=self.test_tif_path )

        target_sample = FeatureVector( source_filepath=self.test_tif_path,
            long=True).GenerateFeatures( write_to_disk=False )

#        This doesn't work since the ranges of features are so wide
#        Tried using relative tolerance, but no dice:
#        from numpy.testing import assert_allclose
#        assert_allclose( reference_sample.values, target_sample.values, rtol=1e-3 )

        # Remember we're reading these values in from strings. and the ranges are so wide
        # you only have 6 sig figs. Better apples to apples comparison is to 
        # compare strings.
        self.assertTrue( compare( target_sample.values, reference_sample.values ) )
    def test_LargeFeatureSetGrayscale( self ):
        """Large feature set, grayscale image"""
        reference_sample = FeatureVector.NewFromSigFile( self.sig_file_path,
            image_path=self.test_tif_path )

        target_sample = FeatureVector( source_filepath=self.test_tif_path,
            long=True).GenerateFeatures( write_to_disk=False )

#        This doesn't work since the ranges of features are so wide
#        Tried using relative tolerance, but no dice:
#        from numpy.testing import assert_allclose
#        assert_allclose( reference_sample.values, target_sample.values, rtol=1e-3 )

        # Remember we're reading these values in from strings. and the ranges are so wide
        # you only have 6 sig figs. Better apples to apples comparison is to 
        # compare strings.
        self.assertTrue( compare( target_sample.values, reference_sample.values ) )
    def test_HeatMap_w_FeatureComputationPlan( self ):
        """Classification results using SampleImageTiles method and FOF should be the same.
        """

        # chris@NIA-LG-01778617 ~/src/wnd-charm/tests/pywndcharm_tests
        # $ tiffinfo lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif
        # TIFF Directory at offset 0x18ea9c (1632924)
        #   Image Width: 1388 Image Length: 1040
        #   Bits/Sample: 8
        #   Compression Scheme: LZW
        #   Photometric Interpretation: min-is-black
        #   Samples/Pixel: 1
        #   Rows/Strip: 5
        #   Planar Configuration: single image plane

        # 5x6 tiling scheme => tile dims 208 x 231.33 each
        scan_x = 231
        scan_y = 208

        #num_features = 200

        # Inflate the zipped test fit into a temp file
        tempdir = mkdtemp()
        
        try:
            import zipfile
            reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile( reference_sigs, mode='r' )
            zf.extractall( tempdir )

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            from shutil import copy

            # copy the tiff to the tempdir so the .sig files end up there too
            copy( orig_img_filepath, tempdir )
            input_image_path = tempdir + sep + img_filename

            # create the tile image iterator
            image_iter = SampleImageTiles( input_image_path, scan_x, scan_y, True)
            print "Number of samples = " + str( image_iter.samples )

            base, ext = splitext( input_image_path )

            # Just grab the first tile:
            import pdb; pdb.set_trace()
            tile_cropped_px_plane = image_iter.sample()

            kwargs = {}
            kwargs[ 'name' ] = input_image_path
            kwargs[ 'source_filepath' ] = tile_cropped_px_plane
            #kwargs[ 'feature_names' ] = fw.feature_names
            #kwargs[ 'feature_computation_plan' ] = comp_plan
            kwargs[ 'long' ] = True
            kwargs[ 'tile_num_cols' ] = image_iter.tiles_x
            kwargs[ 'tile_num_rows' ] = image_iter.tiles_y
            kwargs[ 'tiling_scheme' ] = '{0}x{1}'.format( image_iter.tiles_x, image_iter.tiles_y )
            kwargs[ 'tile_col_index' ] = image_iter.current_col
            kwargs[ 'tile_row_index' ] = image_iter.current_row
            kwargs[ 'sample_group_id' ] = 0

            top_left_tile_feats = FeatureVector( **kwargs ).GenerateFeatures( quiet=False, write_to_disk=False )

            top_left_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'sj-05-3362-R2_001_E-t5x6_0_0-l.sig' ) 

            # Remember we're reading these values in from strings. and the ranges are so wide
            # you only have 6 sig figs. Better apples to apples comparison is to
            # compare strings.
            self.assertTrue( compare( top_left_tile_feats.values, top_left_tile_reference_feats.values ) )

            # Setting feature_names initiates the feature reduce from
            # the larger set of features that comes back from computation
            #kwargs[ 'feature_names' ] = fw.feature_names
            # if these are set, then the code will try to take a ROI of a ROI:
            #kwargs[ 'x' ] = image_iter.current_x
            #kwargs[ 'y' ] = image_iter.current_y
            #kwargs[ 'w' ] = image_iter.tile_width
            #kwargs[ 'h' ] = image_iter.tile_height

        finally:
            rmtree( tempdir )
    def test_FeatureComputationFromROI( self ):
        """Specify bounding box to FeatureVector, calc features, then compare
        with C++ implementation-calculated feats."""

        # orig image lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif
        # has size=1388x1040
        # WND-CHARM command line specifies via -tCxR param
        # where C is columns and R is rows, ergo 5 rows, 6 cols = -t6x5
        # tile dims => w=1388/6 cols = 231.33px wide, h=1040/5 rows = 208 px tall
        ROI_width = 231
        ROI_height = 208

        # Inflate the zipped test fit into a temp file
        tempdir = mkdtemp()
 
        try:
            import zipfile
            reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile( reference_sigs, mode='r' )
            zf.extractall( tempdir )

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            from shutil import copy

            # copy the tiff to the tempdir so the .sig files end up there too
            copy( orig_img_filepath, tempdir )
            input_image_path = tempdir + sep + img_filename

            kwargs = {}
            kwargs[ 'name' ] = img_filename
            kwargs[ 'source_filepath' ] = input_image_path
            #kwargs[ 'feature_names' ] = fw.feature_names
            #kwargs[ 'feature_computation_plan' ] = comp_plan
            kwargs[ 'long' ] = True

            kwargs[ 'x' ] = 0
            kwargs[ 'y' ] = 0
            kwargs[ 'w' ] = ROI_width
            kwargs[ 'h' ] = ROI_height

            kwargs[ 'sample_group_id' ] = 0

            top_left_tile_feats = FeatureVector( **kwargs ).GenerateFeatures( quiet=False, write_to_disk=False )
            top_left_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_0_0-l.sig' ) 

            # Remember we're reading these values in from strings. and the ranges are so wide
            # you only have 6 sig figs. Better apples to apples comparison is to
            # compare strings.
            self.assertEqual( top_left_tile_feats.feature_names, top_left_tile_reference_feats.feature_names )
            self.assertTrue( compare( top_left_tile_feats.values, top_left_tile_reference_feats.values ) )

            kwargs[ 'x' ] = 1155
            kwargs[ 'y' ] = 832

            bot_right_tile_feats = FeatureVector( **kwargs ).GenerateFeatures( quiet=False, write_to_disk=False )
            bot_right_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.sig' ) 

            self.assertEqual( bot_right_tile_feats.feature_names, bot_right_tile_reference_feats.feature_names )
            self.assertTrue( compare( bot_right_tile_feats.values, bot_right_tile_reference_feats.values ) )

        finally:
            rmtree( tempdir )
Beispiel #7
0
    def test_NewFromFileOfFiles( self ):
        """Pulls in the lymphoma eosin histology 5x6 tiled featureset via .sig files."""

        # Types of files containing features:
        # FIT: contains an entire FeatureSpace definition including features.
        # FOF: "File Of Files" containing a FeatureSpace structure definition only,
        #      listing paths to files of pre-calculated features (.sig) or the
        #      tiff images themselves so features can be calculated
        # SIG: A text file containing pre-calculated features for a single sample.

        # Test dataset: subset of the IICBU2008 lymphoma dataset. 2 channels (H+E),
        #    3 classes ('CLL', 'FL', 'MCL'), 10 images per class per channel,
        #    5x6 tiling grid = 30 samples per image resulting in 
        #    2 x 3 x 10 X 30 = 1800 total samples available

        # Files containing features included in this test suite:
        # 1. lymphoma_iicbu2008_subset_EOSIN_ONLY_t5x6_v3.2features.fit.zip:
        #    A zip archive containing a single FIT file with features pre-calculated.
        # 2. lymphoma_iicbu2008_subset_HE_t5x6_v3.2features_SIGFILES.zip:
        #    Contains 1800 SIG files, plus 4 FOF files (items 2-5 below):
        #       "lymphoma_iicbu2008_subset_EOSIN_ONLY_images.fof.tsv"
        #       "lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv"
        #       "lymphoma_iicbu2008_subset_2CHAN_HE_images.fof.tsv"
        #       "lymphoma_iicbu2008_subset_2CHAN_HE_sigfiles_t5x6-l.fof.tsv"

        # List of possible feature sources:
        #    1. Single channel FIT (Eosin only)
        #    2. Single channel FOF (Eosin only) referencing to 30 tiffs (requires global sampling options -t5x6 -l to grab sigs)
        #    3. Single channel FOF (Eosin only) referencing 900 sig files
        #    4. Double channel FOF (Eosin+Haemotoxylin) referencing 60 tiffs (requires global sampling options -t5x6 -l to grab sigs)
        #    5. Double channel FOF (Eosin+Haemotoxylin) referencing 1800 sig files.

        #=============================================
        # BEGIN CODE TO CREATE TESTDATA ZIP PACKAGE

        #import zipfile
        #import zlib
        #path = '/Users/chris/src/wnd-charm/tests/pywndcharm_tests/TESTDATA_lymphoma_iicbu2008_subset_HE_t5x6_v3.2features_SIGFILES.zip'
        #zf = zipfile.ZipFile( path, mode='w' )
        #import os
        #classes = 'CLL', 'FL', 'MCL',
        #channels = 'haemotoxylin', 'eosin'
        #from collections import defaultdict
        #sig_tracker = defaultdict(int)
        #samplegroupid_tracker = {}
        #samplegroup_counter = 0
        #
        #eosin_tif_fof = [] # 30 lines
        #eosin_sig_fof = [] # 900 lines
        #double_tif_fof = [] # 30 lines, 2 feature set columns
        #double_sig_fof = [] # 900 lines, 2 feature set columns
        #
        #for _channel in channels:
        #    zf.write( './' + _channel, compress_type=zipfile.ZIP_DEFLATED )
        #    for _class in classes:
        #        zf.write( './' + _channel + '/' + _class, compress_type=zipfile.ZIP_DEFLATED )
        #        for root, dirs, files in os.walk( _channel + '/' + _class ):
        #            for _file in files:
        #                if _file.endswith( '.tif' ):
        #                    # Strip off the _H.tif or _E.tif
        #                    samplename = _file[:-6]
        #                    eosinpath = './eosin/' + _class + '/' + samplename + '_E.tif'
        #                    haemopath = './haemotoxylin/' + _class + '/' + samplename + '_H.tif'
        #                    if _channel == 'eosin':
        #                        eosin_tif_fof.append( eosinpath + '\t' + _class )
        #                        double_tif_fof.append( samplename + '\t' + _class + '\t' + eosinpath + '\t{\tchannel\t=\teosin\t}\t' + haemopath + '\t{\tchannel\t=\thaemotoxylin\t}')
        #                elif _file.endswith( '.sig' ):
        #                    zf.write( './' + _channel + '/' + _class + '/' + _file, compress_type=zipfile.ZIP_DEFLATED )
        #                    if _channel == 'eosin':
        #                        # Strip off the _H-t5x6_0_0-l.sig
        #                        samplename = _file[:-17] + '.tif'
        #                        eosinpath = './eosin/' + _class + '/' + _file
        #                        haemopath = './haemotoxylin/' + _class + '/' + _file.replace( '_E-t5x6_', '_H-t5x6_' )
        #                        # count samples from 0:
        #                        samplesequenceid = str( sig_tracker[ samplename ] )
        #                        sig_tracker[ samplename ] += 1
        #                        if samplename not in samplegroupid_tracker:
        #                            samplegroupid_tracker[ samplename ] = samplegroup_counter
        #                            samplegroup_counter += 1
        #                        samplegroupid = str( samplegroupid_tracker[ samplename ] )
        #                        eosin_sig_fof.append( eosinpath + '\t' + _class )
        #                        double_sig_fof.append( samplename + '\t' + _class + '\t' + eosinpath + '\t{\tchannel\t=\teosin\t;\tsamplegroupid\t=\t' + samplegroupid + '\t;\tsamplesequenceid\t=\t' + samplesequenceid + '\t}\t' + haemopath + '\t{\tchannel\t=\thaemotoxylin\t;\tsamplegroupid\t=\t' + samplegroupid + '\t;\tsamplesequenceid\t=\t' + samplesequenceid + '\t}\t')
        #
        #fof_dir = '/Users/chris/src/wnd-charm/tests/pywndcharm_tests/'
        #with open( 'lymphoma_iicbu2008_subset_EOSIN_ONLY_images.fof.tsv', 'w') as out:
        #    for _ in eosin_tif_fof:
        #        out.write( _ + '\n')
        #with open( 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv', 'w') as out:
        #    for _ in eosin_sig_fof:
        #        out.write( _ + '\n')
        #with open( 'lymphoma_iicbu2008_subset_2CHAN_HE_images.fof.tsv', 'w') as out:
        #    for _ in double_tif_fof:
        #        out.write( _ + '\n')
        #with open( 'lymphoma_iicbu2008_subset_2CHAN_HE_sigfiles_t5x6-l.fof.tsv', 'w') as out:
        #    for _ in double_sig_fof:
        #        out.write( _ + '\n')
        #zf.write( './' + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_images.fof.tsv', compress_type=zipfile.ZIP_DEFLATED )
        #zf.write( './' + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv', compress_type=zipfile.ZIP_DEFLATED )
        #zf.write( './' + 'lymphoma_iicbu2008_subset_2CHAN_HE_images.fof.tsv', compress_type=zipfile.ZIP_DEFLATED )
        #zf.write( './' + 'lymphoma_iicbu2008_subset_2CHAN_HE_sigfiles_t5x6-l.fof.tsv', compress_type=zipfile.ZIP_DEFLATED )
        #zf.printdir()
        #zf.close()

        # END CODE TO CREATE TESTDATA ZIP PACKAGE
        #=============================================

        # Inflate the zipped test fit into a temp file
        import zipfile
        
        zipped_file_path = pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_HE_t5x6_v3.2features_SIGFILES.zip'
        zf1 = zipfile.ZipFile( zipped_file_path, mode='r' )
        tempdir = mkdtemp()
        zf1.extractall( tempdir )

        # for comparison:
        zf2 = zipfile.ZipFile( pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_t5x6_v3.2features.fit.zip', mode='r')
        zf2.extractall( tempdir )

        try:
            kwargs = {}
            kwargs['pathname'] = tempdir + sep + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv'
            kwargs['quiet'] = True
            # sampling opts: -l -t5x6 implies 5 columns and 6 rows ... I know it's weird.
            kwargs['long'] = True
            kwargs['tile_num_rows'] = 6
            kwargs['tile_num_cols'] = 5
            fs_fof = FeatureSpace.NewFromFileOfFiles( **kwargs )

            kwargs['pathname'] = tempdir + sep + 'lymphoma_iicbu2008_subset_eosin_t5x6_v3.2features.fit'
            fs_fit = FeatureSpace.NewFromFitFile( **kwargs )

            # Fit file has less significant figures than Signature files, and it's not
            # consistent how many there are. Seems like fit file just lops off numbers
            # at the end. Example: (signatures on top, fit on bottom)
            #
            # Example:
            # -  17.232246,  # sig
            # ?         --
            #
            # +  17.2322,    # fit
            # -  -63.549056, # sig
            # ?         ^^^
            #
            # +  -63.5491,   # fit
            # ?         ^
            #
            # -  223.786977, # sig
            # ?        ---
            #
            # +  223.787,    # fit

            # More of the same:
            #(Pdb) fs_fof.data_matrix[0,-5:]
            #array([   0.935442,   14.005003,  -43.562076,  127.394914,    0.628772])
            #(Pdb) fs_fit.data_matrix[0,-5:]
            #array([   0.935442,   14.005   ,  -43.5621  ,  127.395   ,    0.628772])

            # default is rtol=1e-07, atol=0
            #np.testing.assert_allclose( actual=fs_fit.data_matrix, desired=fs_fof.data_matrix,
            #        rtol=1e-03, atol=0 )
            #np.testing.assert_array_almost_equal_nulp( fs_fit.data_matrix, fs_fof.data_matrix )
            for row_num, (fit_row, fof_row) in enumerate( zip( fs_fit.data_matrix, fs_fof.data_matrix )):
                retval = compare( fit_row, fof_row )
                if retval == False:
                    print "error in sample row", row_num
                    print "FIT: ", fs_fit._contiguous_sample_names[row_num], "FOF", fs_fof._contiguous_sample_names[row_num]
                self.assertTrue( retval )


            # Test sorting; scramble the FOF then load and check:

            sorted_fof = tempdir + sep + \
                    'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv'

            with open( sorted_fof) as fof:
                lines = fof.readlines()

            from random import shuffle
            shuffle(lines)

            unsorted_fof = tempdir + sep + \
                    'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l_UNSORTED.fof.tsv'

            with open( unsorted_fof, 'w' ) as fof:
                for line in lines:
                    fof.write( line )

            kwargs = {}
            kwargs['pathname'] = unsorted_fof
            kwargs['quiet'] = True
            # sampling opts: -l -t5x6 implies 5 columns and 6 rows ... I know it's weird.
            kwargs['long'] = True
            kwargs['tile_num_rows'] = 6
            kwargs['tile_num_cols'] = 5
            fs_fof = FeatureSpace.NewFromFileOfFiles( **kwargs )
            # Check again
            for row_num, (fit_row, fof_row) in enumerate( zip( fs_fit.data_matrix, fs_fof.data_matrix )):
                retval = compare( fit_row, fof_row )
                if retval == False:
                    print "error in sample row", row_num
                    print "FIT: ", fs_fit._contiguous_sample_names[row_num], "FOF", fs_fof._contiguous_sample_names[row_num]
                self.assertTrue( retval )

            # TESTING TAKE TILES:
            self.assertRaises( ValueError, fs_fof.TakeTiles, tuple() )
            self.assertRaises( ValueError, fs_fof.TakeTiles, (45, 46, 47,) )
            self.assertRaises( TypeError, fs_fof.TakeTiles, 'crap' )

            # take middle 4
            wanted_tiles = ( 14, 15, 20, 21 )

            took = fs_fof.TakeTiles( wanted_tiles, inplace=False )
            num_sample_groups = len( set( fs_fof._contiguous_sample_group_ids ) )
            self.assertEqual( took.num_samples_per_group, len( wanted_tiles ) )
            self.assertEqual( took.num_samples, len( wanted_tiles ) * num_sample_groups )

#            mid4 = 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_MIDDLE_4_TILES_t5x6-l.fof.tsv'
#            # fake out wndcharm by putting empty tiffs in the temp dir
#            # we don't need them, the sigs are in there already.
#            with open( mid4) as fof:
#                lines = fof.readlines()
#                names, classes, paths, opts = zip( *[ _.split('\t') for _ in lines ] )
#                for _path in paths:
#                    with open( tempdir + sep + _path, 'w' ):
#                        pass
#            took_via_fof = FeatureSpace.NewFromFileOfFiles( mid4, num_samples_per_group=4 )
#
#            for row_num, (fit_row, fof_row) in enumerate( zip( took.data_matrix, took_via_fof.data_matrix )):
#                retval = compare( fit_row, fof_row )
#                if retval == False:
#                    print "error in sample row", row_num
#                    print "FIT: ", took._contiguous_sample_names[row_num], "FOF", took_via_fof._contiguous_sample_names[row_num]
#                self.assertTrue( retval )


        finally:
            rmtree( tempdir )
Beispiel #8
0
    def test_ParallelTiling(self):
        """Specify bounding box to FeatureVector, calc features, then compare
        with C++ implementation-calculated feats."""

        import zipfile
        from shutil import copy
        from tempfile import NamedTemporaryFile

        refdir = mkdtemp(prefix='ref')
        targetdir = mkdtemp(prefix='target')

        try:
            reference_feats = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile(reference_feats, mode='r')
            zf.extractall(refdir)

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            # copy the tiff to the tempdir so the .sig files end up there too
            copy(orig_img_filepath, targetdir)
            copy(orig_img_filepath, refdir)
            input_image_path = targetdir + sep + img_filename

            with NamedTemporaryFile(mode='w',
                                    dir=refdir,
                                    prefix='ref',
                                    delete=False) as temp:
                ref_fof = temp.name
                temp.write('reference_samp\ttest_class\t{}\t{{}}\n'.format(
                    refdir + sep + img_filename))
            with NamedTemporaryFile(mode='w',
                                    dir=targetdir,
                                    prefix='target',
                                    delete=False) as temp:
                target_fof = temp.name
                temp.write(
                    'test_samp\ttest_class\t{}\t{{}}\n'.format(targetdir +
                                                               sep +
                                                               img_filename))

            global_sampling_options = \
                FeatureVector( long=True, tile_num_cols=6, tile_num_rows=5 )

            # Should just load reference sigs
            ref_fs = FeatureSpace.NewFromFileOfFiles(
                ref_fof,
                quiet=False,
                global_sampling_options=global_sampling_options)
            target_fs = FeatureSpace.NewFromFileOfFiles(
                target_fof,
                n_jobs=True,
                quiet=False,
                global_sampling_options=global_sampling_options)

            #from numpy.testing import assert_allclose
            #self.assertTrue( assert_allclose( ref_fs.data_matrix, target_fs.data_matrix ) )
            from wndcharm.utils import compare
            for row_num, (ref_row, test_row) in enumerate(
                    zip(ref_fs.data_matrix, target_fs.data_matrix)):
                retval = compare(ref_row, test_row)
                if retval == False:
                    print "error in sample row", row_num
                    print "FIT: ", ref_fs._contiguous_sample_names[
                        row_num], "FOF", target_fs._contiguous_sample_names[
                            row_num]
                self.assertTrue(retval)
        finally:
            rmtree(refdir)
            rmtree(targetdir)
Beispiel #9
0
    def test_HeatMap_w_FeatureComputationPlan(self):
        """Classification results using SampleImageTiles method and FOF should be the same.
        """

        # chris@NIA-LG-01778617 ~/src/wnd-charm/tests/pywndcharm_tests
        # $ tiffinfo lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif
        # TIFF Directory at offset 0x18ea9c (1632924)
        #   Image Width: 1388 Image Length: 1040
        #   Bits/Sample: 8
        #   Compression Scheme: LZW
        #   Photometric Interpretation: min-is-black
        #   Samples/Pixel: 1
        #   Rows/Strip: 5
        #   Planar Configuration: single image plane

        # 5x6 tiling scheme => tile dims 208 x 231.33 each
        scan_x = 231
        scan_y = 208

        #num_features = 200

        # Inflate the zipped test fit into a temp file
        tempdir = mkdtemp()

        try:
            import zipfile
            reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile(reference_sigs, mode='r')
            zf.extractall(tempdir)

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            from shutil import copy

            # copy the tiff to the tempdir so the .sig files end up there too
            copy(orig_img_filepath, tempdir)
            input_image_path = tempdir + sep + img_filename

            # create the tile image iterator
            image_iter = SampleImageTiles(input_image_path, scan_x, scan_y,
                                          True)
            print "Number of samples = " + str(image_iter.samples)

            base, ext = splitext(input_image_path)

            # Just grab the first tile:
            import pdb
            pdb.set_trace()
            tile_cropped_px_plane = image_iter.sample()

            kwargs = {}
            kwargs['name'] = input_image_path
            kwargs['source_filepath'] = tile_cropped_px_plane
            #kwargs[ 'feature_names' ] = fw.feature_names
            #kwargs[ 'feature_computation_plan' ] = comp_plan
            kwargs['long'] = True
            kwargs['tile_num_cols'] = image_iter.tiles_x
            kwargs['tile_num_rows'] = image_iter.tiles_y
            kwargs['tiling_scheme'] = '{0}x{1}'.format(image_iter.tiles_x,
                                                       image_iter.tiles_y)
            kwargs['tile_col_index'] = image_iter.current_col
            kwargs['tile_row_index'] = image_iter.current_row
            kwargs['sample_group_id'] = 0

            top_left_tile_feats = FeatureVector(**kwargs).GenerateFeatures(
                quiet=False, write_to_disk=False)

            top_left_tile_reference_feats = FeatureVector.NewFromSigFile(
                tempdir + sep + 'sj-05-3362-R2_001_E-t5x6_0_0-l.sig')

            # Remember we're reading these values in from strings. and the ranges are so wide
            # you only have 6 sig figs. Better apples to apples comparison is to
            # compare strings.
            self.assertTrue(
                compare(top_left_tile_feats.values,
                        top_left_tile_reference_feats.values))

            # Setting feature_names initiates the feature reduce from
            # the larger set of features that comes back from computation
            #kwargs[ 'feature_names' ] = fw.feature_names
            # if these are set, then the code will try to take a ROI of a ROI:
            #kwargs[ 'x' ] = image_iter.current_x
            #kwargs[ 'y' ] = image_iter.current_y
            #kwargs[ 'w' ] = image_iter.tile_width
            #kwargs[ 'h' ] = image_iter.tile_height

        finally:
            rmtree(tempdir)
Beispiel #10
0
    def test_FeatureComputationFromROI(self):
        """Specify bounding box to FeatureVector, calc features, then compare
        with C++ implementation-calculated feats."""

        # orig image lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif
        # has size=1388x1040
        # WND-CHARM command line specifies via -tCxR param
        # where C is columns and R is rows, ergo 5 rows, 6 cols = -t6x5
        # tile dims => w=1388/6 cols = 231.33px wide, h=1040/5 rows = 208 px tall
        ROI_width = 231
        ROI_height = 208

        # Inflate the zipped test fit into a temp file
        tempdir = mkdtemp()

        try:
            import zipfile
            reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile(reference_sigs, mode='r')
            zf.extractall(tempdir)

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            from shutil import copy

            # copy the tiff to the tempdir so the .sig files end up there too
            copy(orig_img_filepath, tempdir)
            input_image_path = tempdir + sep + img_filename

            kwargs = {}
            kwargs['name'] = img_filename
            kwargs['source_filepath'] = input_image_path
            #kwargs[ 'feature_names' ] = fw.feature_names
            #kwargs[ 'feature_computation_plan' ] = comp_plan
            kwargs['long'] = True

            kwargs['x'] = 0
            kwargs['y'] = 0
            kwargs['w'] = ROI_width
            kwargs['h'] = ROI_height

            kwargs['sample_group_id'] = 0

            top_left_tile_feats = FeatureVector(**kwargs).GenerateFeatures(
                quiet=False, write_to_disk=False)
            top_left_tile_reference_feats = FeatureVector.NewFromSigFile(
                tempdir + sep +
                'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_0_0-l.sig'
            )

            # Remember we're reading these values in from strings. and the ranges are so wide
            # you only have 6 sig figs. Better apples to apples comparison is to
            # compare strings.
            self.assertEqual(top_left_tile_feats.feature_names,
                             top_left_tile_reference_feats.feature_names)
            self.assertTrue(
                compare(top_left_tile_feats.values,
                        top_left_tile_reference_feats.values))

            kwargs['x'] = 1155
            kwargs['y'] = 832

            bot_right_tile_feats = FeatureVector(**kwargs).GenerateFeatures(
                quiet=False, write_to_disk=False)
            bot_right_tile_reference_feats = FeatureVector.NewFromSigFile(
                tempdir + sep +
                'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.sig'
            )

            self.assertEqual(bot_right_tile_feats.feature_names,
                             bot_right_tile_reference_feats.feature_names)
            self.assertTrue(
                compare(bot_right_tile_feats.values,
                        bot_right_tile_reference_feats.values))

        finally:
            rmtree(tempdir)
    def test_HeatMap_w_FeatureComputationPlan( self ):
        """Classification results using SampleImageTiles method and FOF
        should be the same."""

        # chris@NIA-LG-01778617 ~/src/wnd-charm/tests/pywndcharm_tests
        # $ tiffinfo lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif
        # TIFF Directory at offset 0x18ea9c (1632924)
        #   Image Width: 1388 Image Length: 1040
        #   Bits/Sample: 8
        #   Compression Scheme: LZW
        #   Photometric Interpretation: min-is-black
        #   Samples/Pixel: 1
        #   Rows/Strip: 5
        #   Planar Configuration: single image plane

        # WND-CHARM command line specifies via -tCxR param
        # where C is columns and R is rows, ergo 5 rows, 6 cols = -t6x5
        # tile dims => w=1388/6 cols = 231.33px wide, h=1040/5 rows = 208 px tall
        #scan_x = 231
        #scan_y = 208

        #num_features = 200

        # Inflate the zipped test fit into a temp file
        sourcedir = mkdtemp()
        targetdir = mkdtemp()
        
        try:
            import zipfile
            reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip'
            zf = zipfile.ZipFile( reference_sigs, mode='r' )
            zf.extractall( targetdir )

            img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif"
            orig_img_filepath = pychrm_test_dir + sep + img_filename

            from shutil import copy

            # copy the tiff to the tempdir so the .sig files end up there too
            copy( orig_img_filepath, sourcedir )
            input_image_path = sourcedir + sep + img_filename

            # Create sliding window that emulates 6x5 tiling:
            kwargs = {}
            kwargs[ 'source_filepath' ] = input_image_path
            kwargs[ 'tile_num_cols' ] = 6
            kwargs[ 'tile_num_rows' ] = 5
            kwargs[ 'long' ] = True
            window = SlidingWindow( **kwargs )
            print "Number of samples = " + str( window.num_positions )

            base, ext = splitext( input_image_path )

            ref_file = 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_{}_{}-l.sig'

            # top left:
            for test_feats in window.sample():
                test_feats.GenerateFeatures( quiet=False, write_to_disk=False, cache=True )
                reference_feats = FeatureVector.NewFromSigFile( targetdir + sep + ref_file.format(0,0) )
                self.assertTrue( compare( test_feats.values, reference_feats.values ) )
                break

            # below top left:
            #window.GenerateFeatures( quiet=False, write_to_disk=False, cache=True )
            #reference_feats = FeatureVector.NewFromSigFile( targetdir + sep + ref_file.format(0,1) )
            #self.assertTrue( compare( window.values, reference_feats.values ) )

            # Setting feature_names initiates the feature reduce from
            # the larger set of features that comes back from computation
            #kwargs[ 'feature_names' ] = fw.feature_names


        finally:
            rmtree( sourcedir )
            rmtree( targetdir )