def testUnderscoreColumnFormat(self):
     """Test the per-filter column format with an underscore"""
     config = TransformObjectCatalogConfig()
     config.outputBands = ["g", "r", "i"]
     config.camelCase = False
     task = TransformObjectCatalogTask(config=config)
     funcs = {'Fwhm': HsmFwhm(dataset='meas')}
     df = task.run(self.parq, funcs=funcs, dataId=self.dataId)
     self.assertIsInstance(df, pd.DataFrame)
     for filt in config.outputBands:
         self.assertIn(filt + '_Fwhm', df.columns)
 def testUnderscoreColumnFormat(self):
     """Test the per-filter column format with an underscore"""
     config = TransformObjectCatalogConfig()
     filterMap = {"HSC-G": "g", "HSC-R": "r", "HSC-I": "i"}
     config.filterMap = filterMap
     config.camelCase = False
     task = TransformObjectCatalogTask(config=config)
     funcs = {'Fwhm': HsmFwhm(dataset='meas')}
     df = task.run(self.parq, funcs=funcs, dataId=self.dataId)
     self.assertIsInstance(df, pd.DataFrame)
     for filt in filterMap.values():
         self.assertIn(filt + '_Fwhm', df.columns)
 def testNoOutputBands(self):
     """All the input bands should go into the output, and nothing else.
     """
     config = TransformObjectCatalogConfig()
     config.multilevelOutput = True
     task = TransformObjectCatalogTask(config=config)
     funcs = {'Fwhm': HsmFwhm(dataset='meas')}
     df = task.run(self.parq, funcs=funcs, dataId=self.dataId)
     self.assertIsInstance(df, pd.DataFrame)
     self.assertNotIn('HSC-G', df)
     for filt in ['g', 'r', 'i']:
         self.assertIsInstance(df[filt], pd.DataFrame)
         self.assertIn('Fwhm', df[filt].columns)
 def testMultilevelOutput(self):
     """Test the non-flattened result dataframe with a multilevel column index"""
     config = TransformObjectCatalogConfig()
     config.outputBands = ["r", "i"]
     config.multilevelOutput = True
     task = TransformObjectCatalogTask(config=config)
     funcs = {'Fwhm': HsmFwhm(dataset='meas')}
     df = task.run(self.parq, funcs=funcs, dataId=self.dataId)
     self.assertIsInstance(df, pd.DataFrame)
     self.assertNotIn('g', df)
     for filt in config.outputBands:
         self.assertIsInstance(df[filt], pd.DataFrame)
         self.assertIn('Fwhm', df[filt].columns)
Beispiel #5
0
 def setUp(self):
     self.parq = MultilevelParquetTable(os.path.join(
         ROOT, self.catFilename))
     self.filters = self.parq.columnLevelNames['filter']
     self.task = TransformObjectCatalogTask()
     self.shortFilters = [
         f for k, f in self.task.config.filterMap.items()
         if k in self.filters
     ]
     self.task.config.functorFile = self.yamlFile
     self.funcs = self.task.getFunctors()
     self.columnNames = list(self.funcs.funcDict.keys())
     self.columnNames += list(PostprocessAnalysis._defaultFlags)
     self.noDupCols = [k for k, f in self.funcs.funcDict.items() if f.noDup]
    def testNullFilter(self):
        """Test that columns for all filters are created despite they may not
        exist in the input data.
        """
        config = TransformObjectCatalogConfig()
        config.camelCase = True
        # Want y band columns despite the input data do not have them
        # Exclude g band columns despite the input data have them
        config.outputBands = ["r", "i", "y"]
        # Arbitrarily choose a boolean flag column to be "good"
        config.goodFlags = ['GoodFlagColumn']
        task = TransformObjectCatalogTask(config=config)
        # Add in a float column, an integer column, a good flag, and
        # a bad flag.  It does not matter which columns we choose, just
        # that they have the appropriate type.
        funcs = {
            'FloatColumn': HsmFwhm(dataset='meas'),
            'IntColumn': Column('base_InputCount_value', dataset='meas'),
            'GoodFlagColumn': Column('slot_GaussianFlux_flag', dataset='meas'),
            'BadFlagColumn': Column('slot_Centroid_flag', dataset='meas')
        }
        df = task.run(self.parq, funcs=funcs, dataId=self.dataId)
        self.assertIsInstance(df, pd.DataFrame)

        for filt in config.outputBands:
            self.assertIn(filt + 'FloatColumn', df.columns)
            self.assertIn(filt + 'IntColumn', df.columns)
            self.assertIn(filt + 'BadFlagColumn', df.columns)
            self.assertIn(filt + 'GoodFlagColumn', df.columns)

        # Check that the default filling has worked.
        self.assertNotIn('gFloatColumn', df.columns)
        self.assertTrue(df['yFloatColumn'].isnull().all())
        self.assertTrue(df['iFloatColumn'].notnull().all())
        self.assertTrue(np.all(df['iIntColumn'].values >= 0))
        self.assertTrue(np.all(df['yIntColumn'].values < 0))
        self.assertTrue(np.all(~df['yGoodFlagColumn'].values))
        self.assertTrue(np.all(df['yBadFlagColumn'].values))

        # Check that the datatypes are preserved.
        self.assertEqual(df['iFloatColumn'].dtype, np.dtype(np.float64))
        self.assertEqual(df['yFloatColumn'].dtype, np.dtype(np.float64))
        self.assertEqual(df['iIntColumn'].dtype, np.dtype(np.int64))
        self.assertEqual(df['yIntColumn'].dtype, np.dtype(np.int64))
        self.assertEqual(df['iGoodFlagColumn'].dtype, np.dtype(np.bool_))
        self.assertEqual(df['yGoodFlagColumn'].dtype, np.dtype(np.bool_))
        self.assertEqual(df['iBadFlagColumn'].dtype, np.dtype(np.bool_))
        self.assertEqual(df['yBadFlagColumn'].dtype, np.dtype(np.bool_))
    def testNullFilter(self):
        """Test that columns for all filters are created despite they may not
        exist in the input data.
        """
        config = TransformObjectCatalogConfig()
        # Want y band columns despite the input data do not have them
        # Exclude g band columns despite the input data have them
        config.outputBands = ["r", "i", "y"]
        task = TransformObjectCatalogTask(config=config)
        funcs = {'Fwhm': HsmFwhm(dataset='meas')}
        df = task.run(self.parq, funcs=funcs, dataId=self.dataId)
        self.assertIsInstance(df, pd.DataFrame)
        for column in ('coord_ra', 'coord_dec'):
            self.assertIn(column, df.columns)

        for filt in config.outputBands:
            self.assertIn(filt + 'Fwhm', df.columns)

        self.assertNotIn('gFwhm', df.columns)
        self.assertTrue(df['yFwhm'].isnull().all())
        self.assertTrue(df['iFwhm'].notnull().all())
Beispiel #8
0
#!/usr/bin/env python
from lsst.pipe.tasks.postprocess import TransformObjectCatalogTask
TransformObjectCatalogTask.parseAndRun()
Beispiel #9
0
class PostprocessTestCase(unittest.TestCase):

    catFilename = 'multilevel_test.parq'
    yamlFile = 'testFunc.yaml'

    def setUp(self):
        self.parq = MultilevelParquetTable(os.path.join(
            ROOT, self.catFilename))
        self.filters = self.parq.columnLevelNames['filter']
        self.task = TransformObjectCatalogTask()
        self.shortFilters = [
            f for k, f in self.task.config.filterMap.items()
            if k in self.filters
        ]
        self.task.config.functorFile = self.yamlFile
        self.funcs = self.task.getFunctors()
        self.columnNames = list(self.funcs.funcDict.keys())
        self.columnNames += list(PostprocessAnalysis._defaultFlags)
        self.noDupCols = [k for k, f in self.funcs.funcDict.items() if f.noDup]

    def tearDown(self):
        del self.parq

    def checkMultiLevelResults(self, df, dataId=None):
        assert type(df.columns) == pd.core.indexes.multi.MultiIndex

        assert len(df.columns.levels) == 2  # Make sure two levels
        assert df.columns.names == ['filter', 'column']

        # Make sure the correct columns are there
        assert all([f in df.columns.levels[0] for f in self.filters])
        assert all([c in df.columns.levels[1] for c in self.columnNames])

        if dataId is not None:
            for k in dataId.keys():
                assert all([k in df[f].columns for f in self.filters])

    def checkFlatResults(self, df, dataId=None):
        assert type(df.columns) == pd.core.indexes.base.Index

        noDupCols = list(self.noDupCols)  # Copy
        if dataId is not None:
            noDupCols += list(dataId.keys())

        assert all([c in df.columns for c in self.noDupCols])

        missing = []
        for filt, col in itertools.product(self.shortFilters,
                                           self.columnNames):
            if col not in self.noDupCols:
                mungedCol = '{0}_{1}'.format(filt, col)
                if mungedCol not in df.columns:
                    missing.append(mungedCol)

        assert len(missing) == 0

    def testRun(self):

        dataId = {'patch': '4,4'}

        # Test with multilevel output
        self.task.config.multilevelOutput = True

        df = self.task.run(self.parq)
        self.checkMultiLevelResults(df)

        df = self.task.run(self.parq, funcs=self.funcs)
        self.checkMultiLevelResults(df)

        df = self.task.run(self.parq, dataId=dataId)
        self.checkMultiLevelResults(df)

        # Test with flat output
        self.task.config.multilevelOutput = False

        df = self.task.run(self.parq)
        self.checkFlatResults(df)

        df = self.task.run(self.parq, funcs=self.funcs)
        self.checkFlatResults(df)

        df = self.task.run(self.parq, dataId=dataId)
        self.checkFlatResults(df)