Beispiel #1
0
 def setUp(self):
     df = pd.read_csv(os.path.join(ROOT, 'data',
                                   'test_multilevel_parq.csv.gz'),
                      header=[0, 1, 2],
                      index_col=0)
     with lsst.utils.tests.getTempFilePath('*.parq') as filename:
         table = pa.Table.from_pandas(df)
         pq.write_table(table, filename, compression='none')
         self.parq = MultilevelParquetTable(filename)
     self.filters = ['HSC-G', 'HSC-R']
    def setUp(self):
        # Note that this test input includes HSC-G, HSC-R, and HSC-I data
        df = pd.read_csv(os.path.join(ROOT, 'data', 'test_multilevel_parq.csv.gz'),
                         header=[0, 1, 2], index_col=0)
        with lsst.utils.tests.getTempFilePath('*.parq') as filename:
            table = pa.Table.from_pandas(df)
            pq.write_table(table, filename, compression='none')
            self.parq = MultilevelParquetTable(filename)

        self.dataId = {"tract": 9615, "patch": "4,4"}
Beispiel #3
0
 def setUp(self):
     self.parq = MultilevelParquetTable(os.path.join(
         ROOT, self.catFilename))
     self.filters = self.parq.columnLevelNames['filter']
     self.task = TransformObjectCatalogTask()
     self.shortFilters = [
         f for k, f in self.task.config.filterMap.items()
         if k in self.filters
     ]
     self.task.config.functorFile = self.yamlFile
     self.funcs = self.task.getFunctors()
     self.columnNames = list(self.funcs.funcDict.keys())
     self.columnNames += list(PostprocessAnalysis._defaultFlags)
     self.noDupCols = [k for k, f in self.funcs.funcDict.items() if f.noDup]
Beispiel #4
0
    def simulateMultiParquet(self, dataDict):
        """Create a simple test MultilevelParquetTable
        """
        simpleDF = pd.DataFrame(dataDict)
        dfFilterDSCombos = []
        for ds in self.datasets:
            for filterName in self.filters:
                df = copy.copy(simpleDF)
                df.reindex(sorted(df.columns), axis=1)
                df['dataset'] = ds
                df['filter'] = filterName
                df.columns = pd.MultiIndex.from_tuples(
                    [(ds, filterName, c) for c in df.columns],
                    names=('dataset', 'filter', 'column'))
                dfFilterDSCombos.append(df)

        df = functools.reduce(lambda d1, d2: d1.join(d2), dfFilterDSCombos)

        return MultilevelParquetTable(dataFrame=df)
Beispiel #5
0
 def getParq(self, filename, df):
     return MultilevelParquetTable(filename), MultilevelParquetTable(dataFrame=df)
Beispiel #6
0
 def getParq(self, filename, df):
     fromFile = MultilevelParquetTable(filename)
     fromDf = MultilevelParquetTable(dataFrame=df)
     return fromFile, fromDf
Beispiel #7
0
class FunctorTestCase(unittest.TestCase):
    def setUp(self):
        df = pd.read_csv(os.path.join(ROOT, 'data',
                                      'test_multilevel_parq.csv.gz'),
                         header=[0, 1, 2],
                         index_col=0)
        with lsst.utils.tests.getTempFilePath('*.parq') as filename:
            table = pa.Table.from_pandas(df)
            pq.write_table(table, filename, compression='none')
            self.parq = MultilevelParquetTable(filename)
        self.filters = ['HSC-G', 'HSC-R']

    def _funcVal(self, functor):
        self.assertIsInstance(functor.name, str)
        self.assertIsInstance(functor.shortname, str)

        val = functor(self.parq)
        self.assertIsInstance(val, pd.Series)

        val = functor(self.parq, dropna=True)
        self.assertEqual(val.isnull().sum(), 0)

        return val

    def testColumn(self):
        func = Column('base_FootprintArea_value', filt='HSC-G')
        self._funcVal(func)

    def testCustom(self):
        func = CustomFunctor('2*base_FootprintArea_value', filt='HSC-G')
        val = self._funcVal(func)

        func2 = Column('base_FootprintArea_value', filt='HSC-G')

        np.allclose(val.values,
                    2 * func2(self.parq).values,
                    atol=1e-13,
                    rtol=0)

    def testCoords(self):
        ra = self._funcVal(RAColumn())
        dec = self._funcVal(DecColumn())

        columnDict = {
            'dataset': 'ref',
            'filter': 'HSC-G',
            'column': ['coord_ra', 'coord_dec']
        }
        coords = self.parq.toDataFrame(columns=columnDict) / np.pi * 180.

        self.assertTrue(np.allclose(ra, coords['coord_ra'], atol=1e-13,
                                    rtol=0))
        self.assertTrue(
            np.allclose(dec, coords['coord_dec'], atol=1e-13, rtol=0))

    def testMag(self):
        fluxName = 'base_PsfFlux'

        # Check that things work when you provide dataset explicitly
        for dataset in ['forced_src', 'meas']:
            psfMag_G = self._funcVal(
                Mag(fluxName, dataset=dataset, filt='HSC-G'))
            psfMag_R = self._funcVal(
                Mag(fluxName, dataset=dataset, filt='HSC-R'))

            psfColor_GR = self._funcVal(
                Color(fluxName, 'HSC-G', 'HSC-R', dataset=dataset))

            self.assertTrue(
                np.allclose((psfMag_G - psfMag_R).dropna(),
                            psfColor_GR,
                            rtol=0,
                            atol=1e-13))

        # Check that behavior as expected when dataset not provided;
        #  that is, that the color comes from forced and default Mag is meas
        psfMag_G = self._funcVal(Mag(fluxName, filt='HSC-G'))
        psfMag_R = self._funcVal(Mag(fluxName, filt='HSC-R'))

        psfColor_GR = self._funcVal(Color(fluxName, 'HSC-G', 'HSC-R'))

        # These should *not* be equal.
        self.assertFalse(
            np.allclose((psfMag_G - psfMag_R).dropna(), psfColor_GR))

    def testMagDiff(self):
        for filt in self.filters:
            filt = 'HSC-G'
            val = self._funcVal(
                MagDiff('base_PsfFlux', 'modelfit_CModel', filt=filt))

            mag1 = self._funcVal(Mag('modelfit_CModel', filt=filt))
            mag2 = self._funcVal(Mag('base_PsfFlux', filt=filt))
            self.assertTrue(
                np.allclose((mag2 - mag1).dropna(), val, rtol=0, atol=1e-13))

    def testLabeller(self):
        # Covering the code is better than nothing
        labels = self._funcVal(StarGalaxyLabeller())  # noqa

    def testOther(self):
        # Covering the code is better than nothing
        for filt in self.filters:
            for Func in [
                    DeconvolvedMoments, SdssTraceSize, PsfSdssTraceSizeDiff,
                    HsmTraceSize, PsfHsmTraceSizeDiff, HsmFwhm
            ]:
                val = self._funcVal(Func(filt=filt))  # noqa

    def _compositeFuncVal(self, functor):
        self.assertIsInstance(functor, CompositeFunctor)

        df = functor(self.parq)

        self.assertIsInstance(df, pd.DataFrame)
        self.assertTrue(
            np.all([k in df.columns for k in functor.funcDict.keys()]))

        df = functor(self.parq, dropna=True)

        # Check that there are no nulls
        self.assertFalse(df.isnull().any(axis=None))

        return df

    def testComposite(self):
        filt = 'HSC-G'
        funcDict = {
            'psfMag_ref': Mag('base_PsfFlux', dataset='ref'),
            'ra': RAColumn(),
            'dec': DecColumn(),
            'psfMag': Mag('base_PsfFlux', filt=filt),
            'cmodel_magDiff': MagDiff('base_PsfFlux',
                                      'modelfit_CModel',
                                      filt=filt)
        }
        func = CompositeFunctor(funcDict)
        df = self._compositeFuncVal(func)

        # Repeat same, but define filter globally instead of individually
        funcDict2 = {
            'psfMag_ref': Mag('base_PsfFlux', dataset='ref'),
            'ra': RAColumn(),
            'dec': DecColumn(),
            'psfMag': Mag('base_PsfFlux'),
            'cmodel_magDiff': MagDiff('base_PsfFlux', 'modelfit_CModel')
        }

        func2 = CompositeFunctor(funcDict2, filt=filt)
        df2 = self._compositeFuncVal(func2)
        self.assertTrue(df.equals(df2))

        func2.filt = 'HSC-R'
        df3 = self._compositeFuncVal(func2)
        self.assertFalse(df2.equals(df3))

        # Make sure things work with passing list instead of dict
        funcs = [
            Mag('base_PsfFlux', dataset='ref'),
            RAColumn(),
            DecColumn(),
            Mag('base_PsfFlux', filt=filt),
            MagDiff('base_PsfFlux', 'modelfit_CModel', filt=filt)
        ]

        df = self._compositeFuncVal(CompositeFunctor(funcs))

    def testCompositeColor(self):
        funcDict = {
            'a': Mag('base_PsfFlux', dataset='meas', filt='HSC-G'),
            'b': Mag('base_PsfFlux', dataset='forced_src', filt='HSC-G'),
            'c': Color('base_PsfFlux', 'HSC-G', 'HSC-R')
        }
        # Covering the code is better than nothing
        df = self._compositeFuncVal(CompositeFunctor(funcDict))  # noqa