Esempio n. 1
0
def sample_filter(ref_col_name, frac, random_state=None):
    """
    Returns a GCRQuery object to be used in the `filters` argument of get_quantities()
    to randomly sample the object catalog by a given fraction (*frac*).

    *ref_col_name* must be a column of integer values.

    Optionally, provide *random_state* (int or np.random.RandomState) to fix random state.
    """
    # pylint: disable=no-member

    frac = float(frac)
    if frac > 1 or frac < 0:
        raise ValueError("`frac` must be a float number in [0, 1].")
    if frac == 1:
        return GCRQuery()
    if frac == 0:
        return GCRQuery(
            (lambda a: np.zeros_like(a, dtype=np.bool), ref_col_name))

    if not isinstance(random_state, np.random.RandomState):
        random_state = np.random.RandomState(random_state)
    seed = random_state.randint(2**32)

    def _sampler(arr, frac=frac, seed=seed):
        size = len(arr)  # arr is a numpy array of integers
        if size:
            return np.random.RandomState(
                (int(arr[0]) + seed) % (2**32)).rand(size) < frac
        return np.zeros(0, dtype=np.bool)

    return GCRQuery((_sampler, ref_col_name))
Esempio n. 2
0
def partition_filter(partition_name, ids, id_high=None):
    """
    Returns a GCRQuery object to be used in the `native_filters` argument of get_quantities(),
    to select a subset of partitions.

    *partition_name* must be a "native filter quantity" in GCR,
    and the partitions must be specified by integer IDs.
    Existing examples include "tract" for object catalogs and "healpix_pixel" for cosmoDC2.

    If *ids* is a single integer, select only that partition.
    If *ids* and *id_high* are both given as single integers, select [ids, id_high]
    (inclusive on both ends!).
    If *ids* is a list, select partitions in that list (*id_high* is ignored).
    """
    if isinstance(ids, int):
        if id_high is None:
            return GCRQuery(f"{partition_name} == {ids}")
        elif isinstance(id_high, int):
            return GCRQuery(f"{partition_name} >= {ids}",
                            f"{partition_name} <= {id_high}")
        raise ValueError(
            f"When `{partition_name}s` is an integer, `{partition_name}_high` must be an integer or None."
        )

    ids = np.unique(np.asarray(ids, dtype=np.int))
    if not ids.size:
        raise ValueError(f"Must select at least one {partition_name}.")

    def _partition_selector(partition_ids, ids_to_select=ids):
        return np.isin(partition_ids, ids_to_select, assume_unique=True)

    return GCRQuery((_partition_selector, partition_name))
Esempio n. 3
0
    def create_test_sample(self, catalog_data, test_sample):
        """ Select a subset of the catalog data an input test sample.

        This function should be overloaded in inherited classes for more
        complex cuts (e.g. color cuts).

        Parameters
        ----------
        catalog_data : a GenericCatalogReader catalog instance
        test_sample : dictionary of dictionaries
            A dictionary specifying the columns to cut on and the min/max values of
            the cut.
            Example:
                {mag: {min: -23,    max: -22}
                 z:   {min: 0.1031, max: 0.2452}}

        Returns
        -------
        A GenericCatalogReader catalog instance cut to the requested bounds.
        """
        filters = []
        for key in test_sample.keys():
            filters.extend((
                '{} < {}'.format(key, test_sample[key]['max']),
                '{} >= {}'.format(key, test_sample[key]['min']),
            ))
        return GCRQuery(*filters).filter(catalog_data)
Esempio n. 4
0
    def create_test_sample(catalog_data, test_sample):
        """ Select a subset of the catalog data an input test sample.

        This function should be overloaded in inherited classes for more
        complex cuts (e.g. color cuts).

        Parameters
        ----------
        catalog_data : a GenericCatalogReader catalog instance
        test_sample : dictionary of dictionaries
            A dictionary specifying the columns to cut on and the min/max values of
            the cut.
            Example:
                {mag: {min: -23,    max: -22}
                 z:   {min: 0.1031, max: 0.2452}}

        Returns
        -------
        A GenericCatalogReader catalog instance cut to the requested bounds.
        """
        filters = []
        for key, condition in test_sample.items():
            if isinstance(condition, dict):
                if 'max' in condition:
                    filters.append('{} < {}'.format(key, condition['max']))
                if 'min' in condition:
                    filters.append('{} >= {}'.format(key, condition['min']))
            else: #customized filter
                filters.append(condition)
        return GCRQuery(*filters).filter(catalog_data)
Esempio n. 5
0
    def _init_data_indices(self):
        """
        Do the spatial filtering of extragalactic catalog data.
        """

        self._native_filters = None
        descqa_catalog = self._descqa_obj._catalog

        if self._obs_metadata is None or self._obs_metadata._boundLength is None:
            self._data_indices = np.arange(
                self._descqa_obj._catalog['raJ2000'].size)

        else:
            try:
                radius_rad = max(self._obs_metadata._boundLength[0],
                                 self._obs_metadata._boundLength[1])
            except (TypeError, IndexError):
                radius_rad = self._obs_metadata._boundLength

            if 'healpix_pixel' in descqa_catalog._native_filter_quantities:
                ra_rad = self._obs_metadata._pointingRA
                dec_rad = self._obs_metadata._pointingDec
                vv = np.array([
                    np.cos(dec_rad) * np.cos(ra_rad),
                    np.cos(dec_rad) * np.sin(ra_rad),
                    np.sin(dec_rad)
                ])
                healpix_list = healpy.query_disc(8,
                                                 vv,
                                                 radius_rad,
                                                 inclusive=True,
                                                 nest=False)

                healpix_filter = None
                for hh in healpix_list:
                    local_filter = GCRQuery('healpix_pixel==%d' % hh)
                    if healpix_filter is None:
                        healpix_filter = local_filter
                    else:
                        healpix_filter |= local_filter

                if healpix_filter is not None:
                    if self._native_filters is None:
                        self._native_filters = [healpix_filter]
                    else:
                        self._native_filters.append(healpix_filter)

            ra_dec = descqa_catalog.get_quantities(
                ['raJ2000', 'decJ2000'], native_filters=self._native_filters)

            ra = ra_dec['raJ2000']
            dec = ra_dec['decJ2000']

            self._data_indices = np.where(_angularSeparation(ra, dec, \
                    self._obs_metadata._pointingRA, \
                    self._obs_metadata._pointingDec) < radius_rad)[0]

        if self._chunk_size is None:
            self._chunk_size = self._data_indices.size
    def get_catalog_data(gc, quantities, filters=None):
        data = {}
        if not gc.has_quantities(quantities):
            return TestResult(skipped=True, summary='Missing requested quantities')

        data = gc.get_quantities(quantities, filters=filters)
        #make sure data entries are all finite
        data = GCRQuery(*((np.isfinite, col) for col in data)).filter(data)

        return data
Esempio n. 7
0
    def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir):
        prepared = self.prepare_galaxy_catalog(catalog_instance)
        if prepared is None:
            return TestResult(skipped=True)

        if self.use_redmapper:
            try:
                redmapper = GCRCatalogs.load_catalog(catalog_name+'_redmapper')
            except:
                return TestResult(skipped=True)
            
            redmapper = redmapper.get_quantities(['galaxy_id'])
            
        absolute_magnitude_field, mag_fields, quantities_needed = prepared
        bins     = (self.z_bins, self.c_bins, self.mass_bins)
        hist_cen = np.zeros((self.n_z_bins, self.n_c_bins, self.n_mass_bins, self.n_bands-1))
        hist_sat = np.zeros_like(hist_cen)
        hist_mem_cen = np.zeros_like(hist_cen)
        hist_mem_sat = np.zeros_like(hist_cen)
        
        print(absolute_magnitude_field)
        cen_query = GCRQuery('is_central & ({} < -19)'.format(absolute_magnitude_field))
        sat_query = GCRQuery('(~is_central) & ({} < -19)'.format(absolute_magnitude_field))
        
        if 'r_host' in quantities_needed and 'r_vir' in quantities_needed:
            sat_query &= GCRQuery('r_host < r_vir')


        for data in catalog_instance.get_quantities(quantities_needed, return_iterator=True):
            cen_mask = cen_query.mask(data)
            sat_mask = sat_query.mask(data)
            if self.use_redmapper:
                mem_mask = np.in1d(data['galaxy_id'], redmapper['galaxy_id'])

            for i in range(self.n_bands-1):
                color = data[mag_fields[i]] - data[mag_fields[i+1]]

                hdata = np.stack([data['redshift_true'], color, data['halo_mass']]).T            
                hist_cen[:,:,:,i] += np.histogramdd(hdata[cen_mask], bins)[0]
                hist_sat[:,:,:,i] += np.histogramdd(hdata[sat_mask], bins)[0]
                if self.use_redmapper:
                    hist_mem_cen[:,:,:,i] += np.histogramdd(hdata[mem_mask & cen_mask], bins)[0]
                    hist_mem_sat[:,:,:,i] += np.histogramdd(hdata[mem_mask & sat_mask], bins)[0]

        data = cen_mask = sat_mask = mem_mask = None
        
        rs_mean, rs_scat, red_frac_sat, red_frac_cen = self.compute_summary_statistics(hist_sat, hist_cen,
                                                                                    hist_mem_sat, hist_mem_cen)

        red_seq = {'rs_mean':rs_mean,
                   'rs_scat':rs_scat,
                   'red_frac_sat':red_frac_sat,
                   'red_frac_cen':red_frac_cen}
        
        self.make_plot(red_seq, hist_cen, hist_sat, hist_mem_cen, hist_mem_sat, catalog_name, os.path.join(output_dir, 'red_sequence.png'))

        return TestResult(inspect_only=True)
Esempio n. 8
0
    def get_smass(catalog_instance):
        """
        Parameters
        ----------
        catalog_instance = Catalogue to use

        Returns
        -------
        - log10 of stellar mass with CMASS color and magnitude cuts applied
        - number density of galaxies (galaxies per square degree)
        """
        gc = catalog_instance
        sky_area = float(gc.sky_area)

        cols = {
            "smass": gc.first_available("stellar_mass"),
            "g": gc.first_available("mag_true_g_lsst"),
            "r": gc.first_available("mag_true_r_lsst"),
            "i": gc.first_available("mag_true_i_lsst"),
        }
        if not all(cols.values()):
            raise KeyError("Not all needed quantities exist!!")

        valid_smass = GCRQuery("{smass} > 0".format(**cols))
        cmass_cuts = GCRQuery(
            "({r} - {i}) - ({g} - {r}) / 8 > 0.55".format(**cols),
            "{i} < 19.86 + 1.6 * (({r} - {i}) - ({g} - {r}) / 8 - 0.8)".format(**cols),
            "{i} < 19.9".format(**cols),
            "{i} > 17.5".format(**cols),
            "{r} - {i} < 2".format(**cols),
        )

        log_smass_cmass = np.log10(gc.get_quantities([cols["smass"]], filters=[valid_smass, cmass_cuts])[cols["smass"]])

        print()
        print("minimum cmass-cut = ", np.min(log_smass_cmass))
        print("maximum cmass-cut = ", np.max(log_smass_cmass))
        print()

        numDen = len(log_smass_cmass) / sky_area
        return log_smass_cmass, numDen
Esempio n. 9
0
    def create_test_sample(catalog_data, test_sample, h=1):
        """ Select a subset of the catalog data an input test sample.

        This function should be overloaded in inherited classes for more
        complex cuts (e.g. color cuts).

        Parameters
        ----------
        catalog_data : a GenericCatalogReader catalog instance
        test_sample : dictionary of dictionaries
            A dictionary specifying the columns to cut on and the min/max values of
            the cut.
            Example:
                {Mag: {min: -23,    max: -22}
                 z:   {min: 0.1031, max: 0.2452}}

        Returns
        -------
        A GenericCatalogReader catalog instance cut to the requested bounds.
        """
        filters = []
        Mag_shift = 5 * np.log10(
            h
        )  # Magnitude shift to adjust for h=1 units in data (eg Zehavi et. al.)
        for key, condition in test_sample.items():
            if isinstance(condition, dict):
                if 'max' in condition:
                    max_value = condition[
                        'max'] + Mag_shift if 'Mag' in key else condition['max']
                    filters.append('{} < {}'.format(key, max_value))
                if 'min' in condition:
                    min_value = condition[
                        'min'] + Mag_shift if 'Mag' in key else condition['min']
                    filters.append('{} >= {}'.format(key, min_value))
            else:  #customized filter
                if 'Mag_shift' in condition:
                    condition = re.sub('Mag_shift',
                                       '{:0.2f}'.format(Mag_shift), condition)
                    print('Substituted filter to adjust for Mag shifts: {}'.
                          format(condition))
                filters.append(condition)
        print('Test sample filters for {}'.format(test_sample), filters)

        return GCRQuery(*filters).filter(catalog_data)
Esempio n. 10
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        mags = {
            catalog_instance.first_available('mag_{}_cModel'.format(b),
                                             'mag_true_{}'.format(b)):
            'mag_{}'.format(b)
            for b in self.bands
        }
        qs = ['ra', 'dec']
        qs = qs + list(mags)
        # Trick to read both true and observed magnitudes by @Yao
        filters = [GCRQuery(self.selection_cuts)]
        data = catalog_instance.get_quantities(qs, filters=filters)
        data = {mags.get(k, k): v for k, v in data.items()}
        print('Selected %d objects for catalog %s' % (len(data), catalog_name))
        self.ra[catalog_name] = data['ra']
        self.dec[catalog_name] = data['dec']

        for band in self.bands:
            self.magnitude[(catalog_name, band)] = data['mag_%s' % band]
        return TestResult(inspect_only=True)
Esempio n. 11
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        prepared = self.prepare_galaxy_catalog(catalog_instance)
        if prepared is None:
            return TestResult(skipped=True)

        absolute_magnitude1_field, absolute_magnitude2_field, quantities_needed = prepared
        colnames = [absolute_magnitude2_field, 'halo_mass', 'redshift_true']
        bins = (self.magnitude_bins, self.mass_bins, self.z_bins)
        hist_cen = np.zeros(
            (self.n_magnitude_bins, self.n_mass_bins, self.n_z_bins))
        hist_sat = np.zeros_like(hist_cen)

        red_query = GCRQuery((self.color_cut, absolute_magnitude1_field,
                              absolute_magnitude2_field, 'redshift_true'))

        cen_query = GCRQuery('is_central') & red_query
        sat_query = ~GCRQuery('is_central') & red_query

        if 'r_host' in quantities_needed and 'r_vir' in quantities_needed:
            sat_query &= GCRQuery('r_host < r_vir')

        for data in catalog_instance.get_quantities(quantities_needed,
                                                    return_iterator=True):
            cen_mask = cen_query.mask(data)
            sat_mask = sat_query.mask(data)

            data = np.stack((data[k] for k in colnames)).T
            hist_cen += np.histogramdd(data[cen_mask], bins)[0]
            hist_sat += np.histogramdd(data[sat_mask], bins)[0]

        data = cen_mask = sat_mask = None

        halo_counts = hist_cen.sum(axis=0)
        clf = dict()
        clf['sat'] = hist_sat / halo_counts
        clf['cen'] = hist_cen / halo_counts
        clf['tot'] = clf['sat'] + clf['cen']

        self.make_plot(clf, catalog_name, os.path.join(output_dir, 'clf.png'))

        return TestResult(passed=True, score=0)
Esempio n. 12
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):

        bands = set(sum((c.split('-') for c in self.colors), []))
        possible_names = ('mag_{}_sdss', 'mag_{}_des', 'mag_true_{}_sdss',
                          'mag_true_{}_des')
        labels = {
            band: catalog_instance.first_available(*(n.format(band)
                                                     for n in possible_names))
            for band in bands
        }
        labels = {k: v for k, v in labels.items() if v}
        if len(labels) < 2:
            return TestResult(
                skipped=True,
                summary=
                'magnitudes in mock catalog do not have at least two needed bands.'
            )
        filters = set((v.rpartition('_')[-1] for v in labels.values()))
        if len(filters) > 1:
            return TestResult(
                skipped=True,
                summary='magnitudes in mock catalog have mixed filters.')
        filter_this = filters.pop()

        labels['redshift'] = 'redshift_true'
        if not catalog_instance.has_quantity(labels['redshift']):
            return TestResult(skipped=True,
                              summary='mock catalog does not have redshift.')

        # Load mock catalog data
        filters = [
            '{} > {}'.format(labels['redshift'], self.zlo),
            '{} < {}'.format(labels['redshift'], self.zhi)
        ]
        data = catalog_instance.get_quantities(list(labels.values()), filters)
        data = {k: data[v] for k, v in labels.items()}

        # Color transformation
        if self.color_transformation_q:
            color_trans = None
            if self.validation_catalog == 'DEEP2':
                color_trans = color_transformation['{}2cfht'.format(
                    filter_this)]
            elif self.validation_catalog == 'SDSS' and filter_this == 'des':
                color_trans = color_transformation['des2sdss']

        if color_trans:
            data_transformed = {}
            for band in bands:
                try:
                    data_transformed[band] = ne.evaluate(color_trans[band],
                                                         local_dict=data,
                                                         global_dict={})
                except KeyError:
                    continue
            data_transformed['redshift'] = data['redshift']
            data = data_transformed
            del data_transformed

        data = GCRQuery('r < {}'.format(self.obs_r_mag_limit)).filter(data)

        # Compute color distribution (PDF, CDF etc.)
        mock_color_dist = self.get_color_dist(data)

        # Calculate Cramer-von Mises statistic
        color_shift = {}
        cvm_omega = {}
        cvm_omega_shift = {}
        for color in self.colors:
            if not ((color in self.obs_color_dist) and
                    (color in mock_color_dist)):
                continue
            color_shift[color] = self.obs_color_dist[color][
                'median'] - mock_color_dist[color]['median']
            cvm_omega[color] = CvM_statistic(
                mock_color_dist[color]['nsample'],
                self.obs_color_dist[color]['nsample'],
                mock_color_dist[color]['binctr'],
                mock_color_dist[color]['cdf'],
                self.obs_color_dist[color]['binctr'],
                self.obs_color_dist[color]['cdf'])
            cvm_omega_shift[color] = CvM_statistic(
                mock_color_dist[color]['nsample'],
                self.obs_color_dist[color]['nsample'],
                mock_color_dist[color]['binctr'] + color_shift[color],
                mock_color_dist[color]['cdf'],
                self.obs_color_dist[color]['binctr'],
                self.obs_color_dist[color]['cdf'])

        self.make_plots(mock_color_dist, color_shift, cvm_omega,
                        cvm_omega_shift, catalog_name, output_dir)

        # Write to summary file
        fn = os.path.join(output_dir, self.summary_output_file)
        with open(fn, 'a') as f:
            f.write('%2.3f < z < %2.3f\n' % (self.zlo, self.zhi))
            f.write('r_mag < %2.3f\n\n' % (self.obs_r_mag_limit))
            for color in self.colors:
                if not ((color in self.obs_color_dist) and
                        (color in mock_color_dist)):
                    continue
                f.write("Median " + color +
                        " difference (obs - mock) = %2.3f\n" %
                        (color_shift[color]))
                f.write(color + ": {} = {:2.6f}\n".format(
                    'CvM statistic', cvm_omega[color]))
                f.write(color + " (shifted): {} = {:2.6f}\n".format(
                    'CvM statistic', cvm_omega_shift[color]))
                f.write("\n")

        return TestResult(inspect_only=True)
Esempio n. 13
0
# Solution for Challenge 2 of DC2 Coadd Run1.1p GCR tutorial Part III: Guided Challenges
import numpy as np
import matplotlib.pyplot as plt
import GCRCatalogs
from GCR import GCRQuery

catalog = GCRCatalogs.load_catalog('dc2_coadd_run1.1p')

filters=[
         GCRQuery('extendedness == 0'),
         GCRQuery('clean'), 
         GCRQuery('blendedness < 10**(-0.375)'),
        ~GCRQuery('I_flag'),
         GCRQuery('i_SNR > 21')
]

g1_modif = lambda ixx,iyy,ixy: (ixx-iyy)/(ixx+iyy)
g2_modif = lambda ixx,iyy,ixy:  2.*ixy/(ixx+iyy)
sigma_modif = lambda ixx,iyy,ixy: (ixx*iyy - ixy**2)**0.25


catalog.add_derived_quantity('g1', g1_modif, 'Ixx', 'Iyy', 'Ixy')
catalog.add_derived_quantity('g2', g2_modif, 'Ixx', 'Iyy', 'Ixy')
catalog.add_derived_quantity('sigma', sigma_modif, 'Ixx', 'Iyy', 'Ixy')

catalog.add_derived_quantity('psf_g1', g1_modif, 'IxxPSF', 'IyyPSF', 'IxyPSF')
catalog.add_derived_quantity('psf_g2', g2_modif, 'IxxPSF', 'IyyPSF', 'IxyPSF')
catalog.add_derived_quantity('psf_sigma', sigma_modif, 'IxxPSF', 'IyyPSF', 'IxyPSF')

quantities = ['ra', 'dec', 
              'mag_i', 'i_SNR', 'psf_fwhm_i',
Esempio n. 14
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        #update color and marker to preserve catalog colors and markers across tests
        catalog_color = next(self._color_iterator)

        #add quantities to catalog if needed
        for band in self.native_luminosities:
            if catalog_instance.has_quantity(self.native_luminosities[band]):
                catalog_instance.add_quantity_modifier(
                    'Mag_true_{}_z0'.format(band),
                    (lambda x: -2.5 * np.log10(x),
                     self.native_luminosities[band]))

        print('Checking for required quantities')
        #check catalog data for required quantities
        required_quantities = []
        for pgroup in self.possible_quantities:
            found_quantity = catalog_instance.first_available(*pgroup)
            if found_quantity is not None:
                required_quantities.append(found_quantity)
        if not catalog_instance.has_quantities(required_quantities +
                                               self.filter_quantities):
            return TestResult(
                skipped=True,
                summary='Missing some required quantities: {}'.format(
                    ', '.join(required_quantities)))
        if self.ancillary_quantities is not None and not catalog_instance.has_quantities(
                self.ancillary_quantities):
            return TestResult(
                skipped=True,
                summary='Missing some ancillary quantities: {}'.format(
                    ', '.join(self.ancillary_quantities)))

        mag_field = catalog_instance.first_available(*self.possible_mag_fields)
        if not mag_field:
            return TestResult(
                skipped=True,
                summary='Missing needed quantities to make magnitude cuts')
        Mag_field = catalog_instance.first_available(*self.possible_Mag_fields)
        if not Mag_field:
            return TestResult(
                skipped=True,
                summary='Missing needed quantities to make magnitude cuts')
        all_quantities = required_quantities + [mag_field, Mag_field
                                                ] + self.filter_quantities
        if self.ancillary_quantities is not None:
            all_quantities = all_quantities + self.ancillary_quantities
        print('Fetching quantities', all_quantities)

        mag_filtername = str(mag_field.split('_')[-2])
        Mag_filtername = str(Mag_field.split('_')[2])
        filelabel = '_'.join(('m', mag_filtername, 'M', Mag_filtername))

        #setup plots
        fig, ax = plt.subplots(self.nrows, self.ncolumns, sharex='col')
        fig.text(self.yaxis_xoffset,
                 self.yaxis_yoffset,
                 self.yaxis,
                 va='center',
                 rotation='vertical')  #setup a common axis label

        #initialize arrays for storing histogram sums
        N_array = np.zeros((self.nrows, self.ncolumns, len(self.ebins) - 1),
                           dtype=np.int)
        sume_array = np.zeros((self.nrows, self.ncolumns, len(self.ebins) - 1))
        sume2_array = np.zeros(
            (self.nrows, self.ncolumns, len(self.ebins) - 1))

        #get catalog data by looping over data iterator (needed for large catalogs) and aggregate histograms
        for catalog_data in catalog_instance.get_quantities(
                all_quantities, filters=self.filters, return_iterator=True):
            catalog_data = GCRQuery(
                *((np.isfinite, col)
                  for col in catalog_data)).filter(catalog_data)
            for morphology, N, sume, sume2 in zip_longest(
                    self.morphology,
                    N_array.reshape(
                        -1, N_array.shape[-1]
                    ),  #flatten all but last dimension of array
                    sume_array.reshape(-1, sume_array.shape[-1]),
                    sume2_array.reshape(-1, sume2_array.shape[-1]),
            ):
                #make cuts
                if morphology is not None:
                    mask = (catalog_data[mag_field] <
                            self.mag_lo.get(morphology))
                    mask &= (
                        self.Mag_hi.get(morphology) < catalog_data[Mag_field]
                    ) & (catalog_data[Mag_field] < self.Mag_lo.get(morphology))
                    if self.ancillary_quantities is not None:
                        for aq, key in zip_longest(
                                self.ancillary_quantities,
                                self.validation_data['cuts'].get(
                                    'ancillary_keys')):
                            mask &= (self.validation_data['cuts'][morphology].get(key+'_min') < catalog_data[aq]) &\
                                    (catalog_data[aq] < self.validation_data['cuts'][morphology].get(key+'_max'))

                    print(
                        'Number of {} galaxies passing selection cuts for morphology {} = {}'
                        .format(catalog_name, morphology, np.sum(mask)))
                    #compute ellipticity from definition
                    e_this = self.ellipticity_function(
                        *[catalog_data[q][mask] for q in required_quantities])
                    #print('mm', np.min(e_this), np.max(e_this))
                    del mask

                    #accumulate histograms
                    N += np.histogram(e_this, bins=self.ebins)[0]
                    sume += np.histogram(e_this,
                                         bins=self.ebins,
                                         weights=e_this)[0]
                    sume2 += np.histogram(e_this,
                                          bins=self.ebins,
                                          weights=e_this**2)[0]

        #check that catalog has entries for quantity to be plotted
        if not np.asarray([N.sum() for N in N_array]).sum():
            raise ValueError('No data found for quantities {}'.format(
                ', '.join(required_quantities)))

        #make plots
        results = {}
        for n, (ax_this, summary_ax_this, morphology, N, sume,
                sume2) in enumerate(
                    zip_longest(
                        ax.flat,
                        self.summary_ax.flat,
                        self.morphology,
                        N_array.reshape(
                            -1, N_array.shape[-1]
                        ),  #flatten all but last dimension of array
                        sume_array.reshape(-1, sume_array.shape[-1]),
                        sume2_array.reshape(-1, sume2_array.shape[-1]),
                    )):
            if morphology is not None:
                #get labels
                cutlabel = '${} < {} < {}$; ${} < {}$; {}'.format(str(self.Mag_hi.get(morphology)), Mag_filtername, str(self.Mag_lo.get(morphology)),\
                                                              mag_filtername, str(self.mag_lo.get(morphology)), morphology)
                ancillary_label = []
                if self.ancillary_quantities is not None:
                    for key in self.validation_data['cuts'].get(
                            'ancillary_keys'):
                        ancillary_label.append('${} <$ {} $< {}$'.format(str(self.validation_data['cuts'][morphology].get(key+'_min')),\
                                               key, str(self.validation_data['cuts'][morphology].get(key+'_max'))))
                ancillary_label = '; '.join(ancillary_label)
                catalog_label = '; '.join((catalog_name, ancillary_label))
                validation_label = ' '.join(
                    (self.validation_data.get('label', ''), morphology))
                reskey = cutlabel.replace('$', '')

                #get points to be plotted
                e_values = sume / N
                sumN = N.sum()
                total = '(# of galaxies = {})'.format(sumN)
                Nerrors = np.sqrt(N)
                if self.normed:
                    binwidths = self.ebins[1:] - self.ebins[:-1]
                    N = N / sumN / binwidths
                    Nerrors = Nerrors / sumN / binwidths

                results[reskey] = {'catalog':{'e_ave':e_values, 'N':N, 'N+':N+Nerrors, 'N-':N-Nerrors,\
                                   'total':total, 'xtralabel':ancillary_label.replace('$', '')}}
                self.catalog_subplot(ax_this, e_values, N, catalog_color,
                                     catalog_label)
                results[reskey]['validation'] = self.validation_subplot(
                    ax_this, self.validation_data.get(morphology),
                    validation_label)
                self.decorate_subplot(ax_this, n, label=cutlabel)

                #add curve for this catalog to summary plot
                self.catalog_subplot(summary_ax_this,
                                     e_values,
                                     N,
                                     catalog_color,
                                     catalog_label,
                                     errors=Nerrors)
                if self.first_pass:  #add validation data if evaluating first catalog
                    self.validation_subplot(
                        summary_ax_this, self.validation_data.get(morphology),
                        validation_label)
                self.decorate_subplot(summary_ax_this, n, label=cutlabel)

            else:
                #make empty subplots invisible
                ax_this.set_visible(False)
                summary_ax_this.set_visible(False)

        #save results for catalog and validation data in txt files
        for filename, dkey, dtype, info in zip_longest(
            (catalog_name, self.observation), ('catalog', 'validation'),
            ('N', 'data'), ('total', )):
            if filename:
                with open(
                        os.path.join(
                            output_dir, ''.join([
                                'Nvs', self.file_label, '_', filelabel + '.txt'
                            ])), 'ab') as f_handle:  #open file in append mode
                    #loop over cuts in results dict
                    for key, value in results.items():
                        self.save_quantities(dtype,
                                             value[dkey],
                                             f_handle,
                                             comment=' '.join(
                                                 (key, value[dkey].get(
                                                     'xtralabel', ''),
                                                  value[dkey].get(info, ''))))

        if self.first_pass:  #turn off validation data plot in summary for remaining catalogs
            self.first_pass = False

        #make final adjustments to plots and save figure
        self.post_process_plot(fig)
        fig.savefig(
            os.path.join(
                output_dir,
                ''.join(['Nvs', self.file_label, '_', filelabel + '.png'])))
        plt.close(fig)
        return TestResult(inspect_only=True)
Esempio n. 15
0
def do_fitting(cat, component, healpix, lim, n_threads):
    """
    Fit a set of components to SEDs, Av, Rv, magNorm using sed_from_galacticus_mags

    Parameters
    ----------
    cat -- the result of GCRCatalogs.load_catalog('catalog_name')

    component -- a string; either 'disk' or 'bulge'

    healpix -- an int indicating which healpixel to fit

    lim -- an int indicating how many objects to actually fit

    Returns
    -------
    numpy arrays of:
    redshift
    galaxy_id
    sed_name
    magNorm
    Av
    Rv
    """

    filter_data = sed_filter_names_from_catalog(cat)
    filter_names = filter_data[component]['filter_name']
    lsst_filter_names = filter_data[component]['lsst_fluxes']
    wav_min = filter_data[component]['wav_min']
    wav_width = filter_data[component]['wav_width']

    H0 = cat.cosmology.H0.value
    Om0 = cat.cosmology.Om0

    healpix_query = GCRQuery('healpix_pixel==%d' % healpix)

    qties = cat.get_quantities(list(filter_names) + list(lsst_filter_names) +
                               ['redshift', 'redshift_true', 'galaxy_id'],
                               native_filters=[healpix_query])

    with np.errstate(divide='ignore', invalid='ignore'):
        mag_array = np.array(
            [-2.5 * np.log10(qties[ff][:lim]) for ff in filter_names])

        lsst_mag_array = np.array(
            [-2.5 * np.log10(qties[ff][:lim]) for ff in lsst_filter_names])

    redshift = qties['redshift'][:lim]
    redshift_true = qties['redshift_true'][:lim]
    (sed_names, mag_norms, av_arr,
     rv_arr) = sed_from_galacticus_mags(mag_array[:, :2], redshift[:2],
                                        redshift_true[:2], H0, Om0, wav_min,
                                        wav_width, lsst_mag_array[:, :2])

    mgr = multiprocessing.Manager()
    out_dict = mgr.dict()
    d_gal = len(redshift) // n_threads
    p_list = []
    for i_start in range(0, len(redshift), d_gal):
        s = slice(i_start, i_start + d_gal)
        p = multiprocessing.Process(
            target=_parallel_fitting,
            args=(mag_array[:, s], redshift[s], redshift_true[s], H0, Om0,
                  wav_min, wav_width, lsst_mag_array[:, s], out_dict, i_start))
        p.start()
        p_list.append(p)

    for p in p_list:
        p.join()

    sed_names = np.empty(len(redshift), dtype=(str, 200))
    mag_norms = np.zeros((6, len(redshift)), dtype=float)
    av_arr = np.zeros(len(redshift), dtype=float)
    rv_arr = np.zeros(len(redshift), dtype=float)
    lsst_fluxes = np.zeros((6, len(redshift)), dtype=float)

    t_start_slicing = time.time()
    for i_start in out_dict.keys():
        s = slice(i_start, i_start + d_gal)
        sed_names[s] = out_dict[i_start][0]
        mag_norms[:, s] = out_dict[i_start][1]
        av_arr[s] = out_dict[i_start][2]
        rv_arr[s] = out_dict[i_start][3]
        lsst_fluxes[:, s] = out_dict[i_start][4]

    return (redshift, qties['galaxy_id'][:lim], sed_names, mag_norms, av_arr,
            rv_arr, lsst_fluxes)
Esempio n. 16
0
                        default=0,
                        help='number of galaxies to randomly validate '
                        '(defaults to zero)')

    args = parser.parse_args()
    assert args.healpix is not None
    assert args.out_dir is not None
    assert args.out_name is not None
    if not os.path.isdir(args.out_dir):
        os.makedirs(args.out_dir)

    sed_dir = getPackageDir('sims_sed_library')

    print('loading %s' % args.catalog)
    cat = GCRCatalogs.load_catalog(args.catalog)
    h_query = GCRQuery('healpix_pixel==%d' % args.healpix)
    if args.lim is None or args.lim < 0:
        gid = cat.get_quantities('galaxy_id',
                                 native_filters=[h_query])['galaxy_id']
        args.lim = 2 * len(gid)

    out_file_name = os.path.join(args.out_dir, args.out_name)

    t_start = time.time()
    ########## actually fit SED, magNorm, and dust parameters to disks and bulges

    t0 = 1539899570.0
    print('starting %d at %.2f' % (args.healpix, time.time() - t0))

    (disk_redshift, disk_id, disk_sed_name, disk_magnorm, disk_av, disk_rv,
     disk_lsst_fluxes) = do_fitting(cat, 'disk', args.healpix, args.lim,
Esempio n. 17
0
        maskedmags = mags[mask][(snr[mask]>snrthreshold-1) & (snr[mask]<snrthreshold+1)]
        if len(maskedmags)>0:
            map_out[ind] = np.mean(maskedmags)
            map_var_out[ind] = np.std(maskedmags)
    
    dtype = [('out','float'), ('var_out','float')]
    rec_mp = np.rec.fromarrays([map_out, map_var_out], dtype=dtype)
    hsp_mp = hsp.HealSparseMap.makeEmpty(32, nsideSparse, dtype=dtype, primary='out')
    hsp_mp.updateValues(pix_uni, rec_mp)
    
    return hsp_mp

catalog = GCRCatalogs.load_catalog('dc2_object_run1.2i_all_columns_with_photoz')
band = 'i'
simple_cuts = [
    GCRQuery('clean'), 
    GCRQuery('detect_isPrimary'),
    GCRQuery((np.isfinite, 'ra')),
    GCRQuery((np.isfinite, 'dec')),
    GCRQuery((np.isfinite, 'mag_%s_cModel'%band)),
    GCRQuery((np.isfinite, 'snr_%s_cModel'%band))
]

# Loads the data after cut
data_cut = catalog.get_quantities(['ra', 'dec', 'snr_%s_cModel'%band, 'mag_%s_cModel'%band], 
                                  filters = simple_cuts)
ra,dec = data_cut['ra'], data_cut['dec']
mags = data_cut['mag_%s_cModel'%band]
snr = data_cut['snr_%s_cModel'%band]
hsp_mp = depth_map_meanSNRrange(ra, dec, mags, snr, 5, 2048)
hsp_mp.write('depth_map.fits', clobber=True)
Esempio n. 18
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        '''
        Loop over magnitude cuts and make plots
        '''
        # load catalog data
        spl = redshift2dist(catalog_instance.cosmology)

        colnames = dict()
        colnames['z'] = catalog_instance.first_available(
            'redshift', 'redshift_true')
        colnames['mag'] = catalog_instance.first_available(
            *self.possible_mag_fields)
        if self.observation == 'onecomp':
            colnames['size'] = catalog_instance.first_available(
                'size', 'size_true')
        elif self.observation == 'twocomp':
            colnames['size_bulge'] = catalog_instance.first_available(
                'size_bulge', 'size_bulge_true')
            colnames['size_disk'] = catalog_instance.first_available(
                'size_disk', 'size_disk_true')

        if not all(v for v in colnames.values()):
            return TestResult(skipped=True,
                              summary='Missing requested quantities')
        #Check whether the columns are finite or not
        filters = [(np.isfinite, c) for c in colnames.values()]

        #Select objects within maximum and minimum redshift of all the bins
        filters.extend((
            '{} < {}'.format(colnames['z'],
                             max(z_bin['z_max'] for z_bin in self.z_bins)),
            '{} >= {}'.format(colnames['z'],
                              min(z_bin['z_min'] for z_bin in self.z_bins)),
        ))
        catalog_data = catalog_instance.get_quantities(list(colnames.values()),
                                                       filters=filters)
        catalog_data = {k: catalog_data[v] for k, v in colnames.items()}

        fig, axes = plt.subplots(2,
                                 3,
                                 figsize=(9, 6),
                                 sharex=True,
                                 sharey=True)
        try:
            col = 0
            row = 0
            for z_bin in self.z_bins:
                ax = axes[row, col]
                # filter catalog data for this bin
                filters = [
                    'z < {}'.format(z_bin['z_max']),
                    'z >= {}'.format(z_bin['z_min']),
                ]

                catalog_data_this = GCRQuery(*filters).filter(catalog_data)
                if len(catalog_data_this['z']) == 0:
                    continue
                z_mean = (z_bin['z_max'] + z_bin['z_min']) / 2.
                output_filepath = os.path.join(
                    output_dir,
                    self.output_filename_template.format(
                        z_bin['z_min'], z_bin['z_max']))
                colors = ['r', 'b']
                default_L_bin_edges = np.array([9, 9.5, 10, 10.5, 11, 11.5])
                default_L_bins = (default_L_bin_edges[1:] +
                                  default_L_bin_edges[:-1]) / 2.
                if self.observation == 'onecomp':
                    logL_G = self.ConvertAbsMagLuminosity(
                        catalog_data_this['mag'], 'g')
                    size_kpc = catalog_data_this[
                        'size'] * self._ARCSEC_TO_RADIAN * interpolate.splev(
                            catalog_data_this['z'],
                            spl) / (1 + catalog_data_this['z'])
                    binned_size_kpc = binned_statistic(
                        logL_G,
                        size_kpc,
                        bins=default_L_bin_edges,
                        statistic='mean')[0]
                    binned_size_kpc_err = binned_statistic(
                        logL_G,
                        size_kpc,
                        bins=default_L_bin_edges,
                        statistic='std')[0]

                    np.savetxt(
                        output_filepath,
                        np.transpose((default_L_bins, binned_size_kpc,
                                      binned_size_kpc_err)))

                    validation_this = self.validation_data[
                        (self.validation_data[:, 0] < z_mean + 0.25)
                        & (self.validation_data[:, 0] > z_mean - 0.25)]

                    ax.semilogy(validation_this[:, 1],
                                10**validation_this[:, 2],
                                label=self.label_template.format(
                                    z_bin['z_min'], z_bin['z_max']))
                    ax.fill_between(validation_this[:, 1],
                                    10**validation_this[:, 3],
                                    10**validation_this[:, 4],
                                    lw=0,
                                    alpha=0.2)
                    ax.errorbar(default_L_bins,
                                binned_size_kpc,
                                binned_size_kpc_err,
                                marker='o',
                                ls='')
                elif self.observation == 'twocomp':
                    logL_I = self.ConvertAbsMagLuminosity(
                        catalog_data_this['mag'], 'i')
                    arcsec_to_kpc = self._ARCSEC_TO_RADIAN * interpolate.splev(
                        catalog_data_this['z'],
                        spl) / (1 + catalog_data_this['z'])

                    binned_bulgesize_kpc = binned_statistic(
                        logL_I,
                        catalog_data_this['size_bulge'] * arcsec_to_kpc,
                        bins=default_L_bin_edges,
                        statistic='mean')[0]
                    binned_bulgesize_kpc_err = binned_statistic(
                        logL_I,
                        catalog_data_this['size_bulge'] * arcsec_to_kpc,
                        bins=default_L_bin_edges,
                        statistic='std')[0]
                    binned_disksize_kpc = binned_statistic(
                        logL_I,
                        catalog_data_this['size_disk'] * arcsec_to_kpc,
                        bins=default_L_bin_edges,
                        statistic='mean')[0]
                    binned_disksize_kpc_err = binned_statistic(
                        logL_I,
                        catalog_data_this['size_disk'] * arcsec_to_kpc,
                        bins=default_L_bin_edges,
                        statistic='std')[0]
                    binned_bulgesize_kpc = np.nan_to_num(binned_bulgesize_kpc)
                    binned_bulgesize_kpc_err = np.nan_to_num(
                        binned_bulgesize_kpc_err)
                    binned_disksize_kpc = np.nan_to_num(binned_disksize_kpc)
                    binned_disksize_kpc_err = np.nan_to_num(
                        binned_disksize_kpc_err)
                    np.savetxt(
                        output_filepath,
                        np.transpose(
                            (default_L_bins, binned_bulgesize_kpc,
                             binned_bulgesize_kpc_err, binned_disksize_kpc,
                             binned_disksize_kpc_err)))

                    validation_this = self.validation_data[
                        (self.validation_data[:, 0] < z_mean + 0.25)
                        & (self.validation_data[:, 0] > z_mean - 0.25)]

                    ax.text(
                        11, 0.3,
                        self.label_template.format(z_bin['z_min'],
                                                   z_bin['z_max']))
                    ax.semilogy(validation_this[:, 1],
                                validation_this[:, 2],
                                label='Bulge',
                                color=colors[0])
                    ax.fill_between(
                        validation_this[:, 1],
                        validation_this[:, 2] + validation_this[:, 4],
                        validation_this[:, 2] - validation_this[:, 4],
                        lw=0,
                        alpha=0.2,
                        facecolor=colors[0])
                    ax.semilogy(validation_this[:, 1] + 0.2,
                                validation_this[:, 3],
                                label='Disk',
                                color=colors[1])
                    ax.fill_between(
                        validation_this[:, 1] + 0.2,
                        validation_this[:, 3] + validation_this[:, 5],
                        validation_this[:, 3] - validation_this[:, 5],
                        lw=0,
                        alpha=0.2,
                        facecolor=colors[1])

                    ax.errorbar(default_L_bins,
                                binned_bulgesize_kpc,
                                binned_bulgesize_kpc_err,
                                marker='o',
                                ls='',
                                c=colors[0])
                    ax.errorbar(default_L_bins + 0.2,
                                binned_disksize_kpc,
                                binned_disksize_kpc_err,
                                marker='o',
                                ls='',
                                c=colors[1])
                    ax.set_xlim([9, 13])
                    ax.set_ylim([1e-1, 25])
                    ax.set_yscale('log', nonposy='clip')
                del catalog_data_this

                col += 1
                if col > 2:
                    col = 0
                    row += 1

                ax.legend(loc='best')

            fig.add_subplot(111, frameon=False)
            # hide tick and tick label of the big axes
            plt.tick_params(labelcolor='none',
                            which='both',
                            top='off',
                            bottom='off',
                            left='off',
                            right='off')
            plt.grid(False)
            plt.xlabel(self.fig_xlabel)
            plt.ylabel(self.fig_ylabel)
            fig.subplots_adjust(hspace=0, wspace=0.2)
            fig.suptitle('{} ($M_V$) vs. {}'.format(catalog_name,
                                                    self.data_label),
                         fontsize='medium',
                         y=0.93)
        finally:
            fig.savefig(os.path.join(output_dir,
                                     '{:s}.png'.format(self.test_name)),
                        bbox_inches='tight')
            plt.close(fig)

        #TODO: calculate summary statistics
        return TestResult(inspect_only=True)
Esempio n. 19
0
sed_fit_dir = os.path.join(sed_fit_dir, 'DC2/cosmoDC2_v1.1.4/sedLookup')
assert os.path.isdir(sed_fit_dir)

hpid = 10069  # an example healpix pixel that has been fit

sed_fit_name = os.path.join(sed_fit_dir, 'sed_fit_%d.h5' % hpid)
assert os.path.isfile(sed_fit_name)

# load cosmoDC2
cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image')

# get galaxy_id and redshift for crossmatching with SED fit files;
# we will also get the magnitudes that should be reproduced
# by our synthetic photometry
# (hp_query makes sure we only load the healpixel we are interested in)
hp_query = GCRQuery('healpix_pixel==%d' % hpid)
cosmoDC2_data = cat.get_quantities([
    'galaxy_id', 'redshift', 'ra', 'dec', 'mag_true_u_lsst', 'mag_true_g_lsst',
    'mag_true_r_lsst', 'mag_true_i_lsst', 'mag_true_z_lsst', 'mag_true_y_lsst',
    'mag_u_lsst', 'mag_g_lsst', 'mag_r_lsst', 'mag_i_lsst', 'mag_z_lsst',
    'mag_y_lsst', 'shear_1', 'shear_2', 'convergence'
],
                                   native_filters=[hp_query])

# make sure cosmoDC2_data is sorted by galaxy_id
sorted_dex = np.argsort(cosmoDC2_data['galaxy_id'])
for colname in cosmoDC2_data.keys():
    cosmoDC2_data[colname] = cosmoDC2_data[colname][sorted_dex]

# read in LSST bandpasses
lsst_bp_dict = sims_photUtils.BandpassDict.loadTotalBandpassesFromFiles()
Esempio n. 20
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        prepared = self.prepare_galaxy_catalog(catalog_instance)
        if prepared is None:
            return TestResult(skipped=True)

        absolute_magnitude1_field, absolute_magnitude2_field, quantities_needed = prepared

        # find out color cut threshold
        color = []
        for data in catalog_instance.get_quantities(
            [
                absolute_magnitude1_field, absolute_magnitude2_field,
                'redshift_true'
            ],
                filters=['redshift_true < 0.2'],
                return_iterator=True,
        ):
            color.append(data[absolute_magnitude1_field] -
                         data[absolute_magnitude2_field])

        color_cut_percentile_at = 100.0 * (1 - self.color_cut_fraction)
        color_cut_thres = np.percentile(np.concatenate(color),
                                        color_cut_percentile_at)
        del color

        colnames = [absolute_magnitude2_field, 'halo_mass', 'redshift_true']
        bins = (self.magnitude_bins, self.mass_bins, self.z_bins)
        hist_cen = np.zeros(
            (self.n_magnitude_bins, self.n_mass_bins, self.n_z_bins))
        hist_sat = np.zeros_like(hist_cen)

        cen_query = GCRQuery('is_central')
        sat_query = ~GCRQuery('is_central')

        if 'r_host' in quantities_needed and 'r_vir' in quantities_needed:
            sat_query &= GCRQuery('r_host < r_vir')

        for data in catalog_instance.get_quantities(
                quantities_needed,
                filters=[
                    '{} - {} > {}'.format(absolute_magnitude1_field,
                                          absolute_magnitude2_field,
                                          color_cut_thres)
                ],
                return_iterator=True,
        ):
            cen_mask = cen_query.mask(data)
            sat_mask = sat_query.mask(data)

            data = np.stack((data[k] for k in colnames)).T
            hist_cen += np.histogramdd(data[cen_mask], bins)[0]
            hist_sat += np.histogramdd(data[sat_mask], bins)[0]

        data = cen_mask = sat_mask = None

        halo_counts = hist_cen.sum(axis=0)
        clf = dict()
        clf['sat'] = hist_sat / halo_counts
        clf['cen'] = hist_cen / halo_counts
        clf['tot'] = clf['sat'] + clf['cen']

        self.make_plot(clf, catalog_name, os.path.join(output_dir, 'clf.png'))

        return TestResult(inspect_only=True)
Esempio n. 21
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        #check catalog data for required quantities
        mag_field = catalog_instance.first_available(*self.possible_mag_fields)
        if not mag_field:
            return TestResult(skipped=True,
                              summary='Missing required mag_field option')
        if not catalog_instance.has_quantity(self.zlabel):
            return TestResult(skipped=True,
                              summary='Missing required {} quantity'.format(
                                  self.zlabel))
        filtername = mag_field.rpartition('_')[-1].upper()
        filelabel = '_'.join((filtername, self.band))

        #setup plots
        fig, ax = plt.subplots(self.nrows,
                               self.ncolumns,
                               figsize=(self.figx_p, self.figy_p),
                               sharex='col')
        catalog_color = next(self.colors)
        catalog_marker = next(self.markers)

        #initialize arrays for storing histogram sums
        N_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1),
                           dtype=np.int)
        sumz_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1))

        #get catalog data by looping over data iterator (needed for large catalogs) and aggregate histograms
        for catalog_data in catalog_instance.get_quantities(
            [self.zlabel, mag_field],
                filters=self.filters,
                return_iterator=True):
            catalog_data = GCRQuery(
                *((np.isfinite, col)
                  for col in catalog_data)).filter(catalog_data)
            for cut_lo, cut_hi, N, sumz in zip_longest(
                    self.mag_lo,
                    self.mag_hi,
                    N_array.reshape(
                        -1, N_array.shape[-1]
                    ),  #flatten all but last dimension of array
                    sumz_array.reshape(-1, sumz_array.shape[-1]),
            ):
                if cut_lo:
                    mask = (catalog_data[mag_field] < cut_lo)
                    if cut_hi:
                        mask &= (catalog_data[mag_field] >= cut_hi)
                    z_this = catalog_data[self.zlabel][mask]
                    del mask

                    #bin catalog_data and accumulate subplot histograms
                    N += np.histogram(z_this, bins=self.zbins)[0]
                    sumz += np.histogram(z_this,
                                         bins=self.zbins,
                                         weights=z_this)[0]

        #loop over magnitude cuts and make plots
        results = {}
        for n, (ax_this, summary_ax_this, cut_lo, cut_hi, N, sumz, z0,
                z0err) in enumerate(
                    zip_longest(
                        ax.flat,
                        self.summary_ax.flat,
                        self.mag_lo,
                        self.mag_hi,
                        N_array.reshape(-1, N_array.shape[-1]),
                        sumz_array.reshape(-1, sumz_array.shape[-1]),
                        self.validation_data.get('z0values', []),
                        self.validation_data.get('z0errors', []),
                    )):
            if cut_lo is None:  #cut_lo is None if self.mag_lo is exhausted
                ax_this.set_visible(False)
                summary_ax_this.set_visible(False)
            else:
                cut_label = '{} $< {}$'.format(self.band, cut_lo)
                if cut_hi:
                    cut_label = '${} <=$ '.format(
                        cut_hi
                    ) + cut_label  #also appears in txt file so don't use \leq

                if z0 is None and 'z0const' in self.validation_data:  #alternate format for some validation data
                    z0 = self.validation_data[
                        'z0const'] + self.validation_data['z0linear'] * cut_lo

                meanz = sumz / N
                sumN = N.sum()
                total = '(# of galaxies = {})'.format(sumN)
                Nerrors = np.sqrt(N)
                if self.normed:
                    binwidths = self.zbins[1:] - self.zbins[:-1]
                    N = N / sumN / binwidths
                    Nerrors = Nerrors / sumN / binwidths

                #make subplot
                catalog_label = ' '.join(
                    (catalog_name,
                     cut_label.replace(self.band,
                                       filtername + ' ' + self.band)))
                validation_label = ' '.join(
                    (self.validation_data.get('label', ''), cut_label))
                key = cut_label.replace('$', '')
                results[key] = {
                    'meanz': meanz,
                    'total': total,
                    'N': N,
                    'N+-': Nerrors
                }
                self.catalog_subplot(ax_this, meanz, N, Nerrors, catalog_color,
                                     catalog_marker, catalog_label)
                if z0 and z0 > 0.:
                    fits = self.validation_subplot(ax_this, meanz, z0, z0err,
                                                   validation_label)
                results[key].update(fits)
                self.decorate_subplot(ax_this, n)

                #add curve for this catalog to summary plot
                self.catalog_subplot(summary_ax_this, meanz, N, Nerrors,
                                     catalog_color, catalog_marker,
                                     catalog_label)
                if self.first_pass and z0 and z0 > 0:
                    self.validation_subplot(
                        summary_ax_this, meanz, z0, z0err, validation_label
                    )  #add validation data if evaluating first catalog
                self.decorate_subplot(summary_ax_this, n)

        #save results for catalog and validation data in txt files
        for filename, dtype, comment, info in zip_longest(
            (filelabel, self.observation), ('N', 'fit'), (filtername, ),
            ('total', )):
            if filename:
                with open(
                        os.path.join(output_dir, 'Nvsz_' + filename + '.txt'),
                        'ab') as f_handle:  #open file in append mode
                    #loop over magnitude cuts in results dict
                    for key, value in results.items():
                        self.save_quantities(dtype,
                                             value,
                                             f_handle,
                                             comment=' '.join(
                                                 ((comment or ''), key,
                                                  value.get(info, ''))))

        if self.first_pass:  #turn off validation data plot in summary for remaining catalogs
            self.first_pass = False

        #make final adjustments to plots and save figure
        self.post_process_plot(fig)
        fig.savefig(os.path.join(output_dir, 'Nvsz_' + filelabel + '.png'))
        plt.close(fig)
        return TestResult(0, passed=True)
Esempio n. 22
0
def load_catalogs(ra_min=52.3, ra_max=57.9, dec_min=-32.2, dec_max=-27.3):
    # coordinate box defaul is set from looking at plots general_analysis.ipynb
    truth = GCRCatalogs.load_catalog("dc2_truth_run1.2_static")
    objects = GCRCatalogs.load_catalog("dc2_object_run1.2p")

    coord_filter = [
        'ra >= {}'.format(ra_min),
        'ra < {}'.format(ra_max),
        'dec >= {}'.format(dec_min),
        'dec < {}'.format(dec_max),
    ]

    object_filter = [
        GCRQuery('clean'),
        (np.isfinite, 'mag_r'),
        (np.isfinite, 'magerr_r'),
        (np.isfinite, 'mag_r_cModel'),
        (np.isfinite, 'mag_g'),
        (np.isfinite, 'magerr_g'),
        (np.isfinite, 'extendedness'),
    ]

    #star_thresh = 0.0164 # see object_gcr_1_intro.ipynb
    star_filter = [
        GCRQuery('extendedness == 0'),
        #GCRQuery('mag_r - mag_r_cModel < {}'.format(star_thresh)),
        #GCRQuery('magerr_r < 0.1'),
    ]

    object_all = objects.get_quantities([
        'objectId', 'ra', 'dec', 'mag_r', 'magerr_r', 'mag_r_cModel', 'mag_g',
        'magerr_g', 'mag_g_cModel', 'extendedness'
    ],
                                        filters=coord_filter + object_filter)
    object_stars = objects.get_quantities([
        'objectId', 'ra', 'dec', 'mag_r', 'magerr_r', 'mag_r_cModel', 'mag_g',
        'magerr_g', 'mag_g_cModel'
    ],
                                          filters=coord_filter +
                                          object_filter + star_filter)

    len_object_all = len(object_all['ra'])
    len_object_stars = len(object_stars['ra'])

    object_data = object_all, len_object_all, object_stars, len_object_stars

    truth_filters = [(np.isfinite, 'r'), (np.isfinite, 'g')]

    truth_all = truth.get_quantities(['ra', 'dec', 'mag_true_r', 'mag_true_g'],
                                     native_filters=coord_filter,
                                     filters=truth_filters)
    truth_stars = truth.get_quantities(
        ['ra', 'dec', 'mag_true_r', 'mag_true_g'],
        native_filters=coord_filter + ['star == 1'],
        filters=truth_filters)
    len_truth_all = len(truth_all['ra'])
    len_truth_stars = len(truth_stars['ra'])

    truth_data = truth_all, len_truth_all, truth_stars, len_truth_all

    print("Coadd objects:", len_object_all, ", Coadd stars:", len_object_stars)
    print("Truth objects:", len_truth_all, ", Truth stars:", len_truth_stars)

    return object_data, truth_data
Esempio n. 23
0
def validate_agn_mags(cat_dir,
                      obsid,
                      agn_db,
                      opsim_db=os.path.join(
                          '/global/projecta/projectdirs/lsst',
                          'groups/SSim/DC2/',
                          'minion_1016_desc_dithered_v4_sfd.db')):
    """
    Parameters
    ----------
    cat_dir is the parent dir of $obsid

    obsid is the obsHistID of the pointing (an int)

    agn_db is the database of AGN parameters

    opsim_db is the path to the cadence database
    """
    if not os.path.isfile(agn_db):
        raise RuntimeError('\n%s\nis not a file\n' % agn_db)

    inst_cat_dir = os.path.join(cat_dir, '%.8d' % obsid)
    if not os.path.isdir(inst_cat_dir):
        raise RuntimeError('\n%s\nis not a dir\n' % inst_cat_dir)

    agn_name = os.path.join(inst_cat_dir, 'agn_gal_cat_%d.txt.gz' % obsid)
    if not os.path.isfile(agn_name):
        raise RuntimeError('\n%s\nis not a file\n' % agn_name)

    phosim_name = os.path.join(inst_cat_dir, 'phosim_cat_%d.txt' % obsid)
    if not os.path.isfile(agn_name):
        raise RuntimeError('\n%s\nis not a file\n' % phosim_name)

    bandpass = None
    vistime = None
    with open(phosim_name, 'r') as in_file:
        for line in in_file:
            params = line.strip().split()
            if params[0] == 'filter':
                bandpass = int(params[1])
            elif params[0] == 'vistime':
                vistime = float(params[1])

            if (bandpass is not None and vistime is not None):

                break

    if bandpass is None:
        raise RuntimeError("Did not read bandpass")

    if vistime is None:
        raise RuntimeError("Did not read vistime")

    if not os.path.isfile(opsim_db):
        raise RuntimeError('\n%s\nis not a file' % opsim_db)

    with sqlite3.connect(opsim_db) as conn:
        c = conn.cursor()
        r = c.execute('SELECT expMJD, descDitheredRA, descDitheredDec '
                      'FROM Summary WHERE obsHistID==%d' % obsid).fetchall()
        mjd = float(r[0][0])
        pointing_ra = float(r[0][1])
        pointing_dec = float(r[0][2])

    agn_colnames = [
        'obj', 'uniqueID', 'ra', 'dec', 'magnorm', 'sed', 'redshift', 'g1',
        'g2', 'kappa', 'dra', 'ddec', 'src_type', 'dust_rest', 'dust_obs',
        'obs_av', 'obs_rv'
    ]

    agn_col_types = {
        'ra': float,
        'dec': float,
        'magnorm': float,
        'redshift': float,
        'sed': bytes,
        'uniqueID': int
    }

    agn_df = pd.read_csv(agn_name,
                         delimiter=' ',
                         compression='gzip',
                         names=agn_colnames,
                         dtype=agn_col_types,
                         nrows=None)

    agn_df['galaxy_id'] = pd.Series(agn_df['uniqueID'] // 1024,
                                    index=agn_df.index)

    vv = np.array([
        np.cos(pointing_dec) * np.cos(pointing_ra),
        np.cos(pointing_dec) * np.sin(pointing_ra),
        np.sin(pointing_dec)
    ])
    hp_list = healpy.query_disc(32,
                                vv,
                                np.radians(2.2),
                                nest=False,
                                inclusive=True)

    chunk_size = 10000
    agn_gid = []
    agn_magnorm = []
    agn_varParamStr = []
    with sqlite3.connect(agn_db) as agn_params_conn:
        agn_params_cursor = agn_params_conn.cursor()
        query = 'SELECT galaxy_id, magNorm, varParamStr FROM agn_params'
        agn_query = agn_params_cursor.execute(query)
        agn_chunk = agn_query.fetchmany(size=chunk_size)
        while len(agn_chunk) > 0:
            agn_chunk = np.array(agn_chunk).transpose()
            chunk_gid = agn_chunk[0].astype(int)
            chunk_magnorm = agn_chunk[1].astype(float)
            chunk_varParamStr = agn_chunk[2]
            valid_agn = np.where(np.in1d(chunk_gid,
                                         agn_df['galaxy_id'].values))
            agn_gid.append(chunk_gid[valid_agn])
            agn_magnorm.append(chunk_magnorm[valid_agn])
            agn_varParamStr.append(chunk_varParamStr[valid_agn])
            agn_chunk = agn_query.fetchmany(size=chunk_size)

    agn_gid = np.concatenate(agn_gid)
    agn_magnorm = np.concatenate(agn_magnorm)
    agn_varParamStr = np.concatenate(agn_varParamStr)
    print('sql gave %d agn' % len(agn_gid))

    sorted_dex = np.argsort(agn_gid)
    agn_gid = agn_gid[sorted_dex]
    agn_magnorm = agn_magnorm[sorted_dex]
    agn_varParamStr = agn_varParamStr[sorted_dex]

    instcat_gid = agn_df['galaxy_id'].values
    instcat_magnorm = agn_df['magnorm'].values
    instcat_z = agn_df['redshift'].values

    valid = np.where(instcat_gid < 1.0e11)
    instcat_gid = instcat_gid[valid]
    instcat_magnorm = instcat_magnorm[valid]
    instcat_z = instcat_z[valid]
    sorted_dex = np.argsort(instcat_gid)
    instcat_gid = instcat_gid[sorted_dex]
    instcat_magnorm = instcat_magnorm[sorted_dex]
    instcat_z = instcat_z[sorted_dex]

    cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image')

    cat_q = {}
    cat_q['galaxy_id'] = []
    cat_q['redshift_true'] = []
    for hp in hp_list:
        hp_query = GCRQuery('healpix_pixel==%d' % hp)

        local_q = cat.get_quantities(['galaxy_id', 'redshift_true'],
                                     native_filters=[hp_query])

        valid = np.in1d(local_q['galaxy_id'], agn_df['galaxy_id'])
        if valid.any():
            for k in cat_q:
                cat_q[k].append(local_q[k][valid])

    for k in cat_q:
        cat_q[k] = np.concatenate(cat_q[k])

    print('we have %d agn' % len(cat_q['galaxy_id']))

    sorted_dex = np.argsort(cat_q['galaxy_id'])
    for k in cat_q:
        cat_q[k] = cat_q[k][sorted_dex]

    if not np.array_equal(cat_q['galaxy_id'], instcat_gid):
        msg = "GCR gid not equal to InstCat\n"
        msg += "len gcr %d\n" % len(cat_q['galaxy_id'])
        msg += "len instcat %d\n" % len(instcat_gid)
        msg += "other comparison %s\n" % str(
            np.array_equal(instcat_gid, agn_gid))
        raise RuntimeError(msg)

    if not np.array_equal(instcat_gid, agn_gid):
        raise RuntimeError("galaxy_id arrays are not equal")

    if len(instcat_gid) == 0:
        raise RuntimeError("no AGN to test")

    agn_params = None
    for var in agn_varParamStr:
        var_dict = json.loads(var)
        if agn_params is None:
            agn_params = {}
            for k in var_dict['p']:
                agn_params[k] = []
        for k in var_dict['p']:
            agn_params[k].append(var_dict['p'][k])

    for k in agn_params:
        agn_params[k] = np.array(agn_params[k])

    agn_simulator = ExtraGalacticVariabilityModels()
    agn_simulator._agn_threads = 3
    d_mag = agn_simulator.applyAgn([np.arange(len(agn_gid), dtype=int)],
                                   agn_params,
                                   mjd,
                                   redshift=cat_q['redshift_true'])

    d_mag_instcat = instcat_magnorm - agn_magnorm
    error = np.abs(d_mag[bandpass] - d_mag_instcat)
    max_error = error.max()
    violation = np.where(error > 1.0e-5)
    for ii in violation[0]:
        print("%e -- %e %e %e" % (error[ii], d_mag[bandpass][ii],
                                  d_mag_instcat[ii], instcat_magnorm[ii]))

        for k in agn_params:
            print('    %s: %e' % (k, agn_params[k][ii]))

    valid = np.where(error <= 1.0e-5)
    d_mag_valid = d_mag_instcat[valid]
    mag_valid = instcat_magnorm[valid]

    if np.max(error) > 1.0e-5:
        raise RuntimeError("\n%s\nAGN validation failed: max mag error %e" %
                           (agn_name, max_error))
Esempio n. 24
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):

        #=========================================
        # Begin Reading in Data
        #=========================================

        # check if needed quantities exist
        if not catalog_instance.has_quantities([
                'redshift_true', self.loz_band, self.hiz_band,
                'emissionLines/totalLineLuminosity:oxygenII3726',
                'emissionLines/totalLineLuminosity:oxygenII3729',
                'emissionLines/totalLineLuminosity:balmerAlpha6563',
                'emissionLines/totalLineLuminosity:balmerBeta4861',
                'emissionLines/totalLineLuminosity:nitrogenII6584',
                'emissionLines/totalLineLuminosity:oxygenIII4959',
                'emissionLines/totalLineLuminosity:oxygenIII5007',
                'emissionLines/totalLineLuminosity:sulfurII6716',
                'emissionLines/totalLineLuminosity:sulfurII6731'
        ]):
            return TestResult(skipped=True,
                              summary='Necessary quantities are not present')

        loz_filter = GCRQuery((np.isfinite, 'redshift_true'),
                              'redshift_true > %f' % self.loz_lo,
                              'redshift_true < %f' % self.loz_hi)
        hiz_filter = GCRQuery((np.isfinite, 'redshift_true'),
                              'redshift_true > %f' % self.hiz_lo,
                              'redshift_true < %f' % self.hiz_hi)
        loz_magcut_filter = GCRQuery(
            (np.isfinite, self.loz_band),
            self.loz_band + ' < %.1f' % self.loz_magcut)
        hiz_magcut_filter = GCRQuery(
            (np.isfinite, self.hiz_band),
            self.hiz_band + ' < %.1f' % self.hiz_magcut)
        ha_fluxlim = GCRQuery(
            (np.isfinite, 'emissionLines/totalLineLuminosity:balmerAlpha6563'),
            (lambda x: x > self.ha_cut,
             'emissionLines/totalLineLuminosity:balmerAlpha6563'))

        data = catalog_instance.get_quantities(
            [
                'redshift_true',
                'emissionLines/totalLineLuminosity:oxygenII3726',
                'emissionLines/totalLineLuminosity:oxygenII3729',
                'emissionLines/totalLineLuminosity:balmerAlpha6563',
                'emissionLines/totalLineLuminosity:balmerBeta4861',
                'emissionLines/totalLineLuminosity:nitrogenII6584',
                'emissionLines/totalLineLuminosity:oxygenIII4959',
                'emissionLines/totalLineLuminosity:oxygenIII5007',
                'emissionLines/totalLineLuminosity:sulfurII6716',
                'emissionLines/totalLineLuminosity:sulfurII6731',
                self.loz_band, self.hiz_band
            ],
            filters=((loz_filter & loz_magcut_filter) |
                     (hiz_filter & hiz_magcut_filter) & ha_fluxlim))

        # data = data[data['emissionLines/totalLineLuminosity:balmerAlpha6563'] > self.ha_cut]

        z = data['redshift_true']
        Halpha = (data['emissionLines/totalLineLuminosity:balmerAlpha6563'] *
                  3.839e26 * u.W).value
        Hbeta = (data['emissionLines/totalLineLuminosity:balmerBeta4861'] *
                 3.839e26 * u.W).value
        NII6584 = (data['emissionLines/totalLineLuminosity:nitrogenII6584'] *
                   3.839e26 * u.W).value
        OIII5007 = (data['emissionLines/totalLineLuminosity:oxygenIII5007'] *
                    3.839e26 * u.W).value
        OIII4959 = (data['emissionLines/totalLineLuminosity:oxygenIII4959'] *
                    3.839e26 * u.W).value
        OII3726 = (data['emissionLines/totalLineLuminosity:oxygenII3726'] *
                   3.839e26 * u.W).value
        OII3729 = (data['emissionLines/totalLineLuminosity:oxygenII3729'] *
                   3.839e26 * u.W).value
        SII6716 = (data['emissionLines/totalLineLuminosity:sulfurII6716'] *
                   3.839e26 * u.W).value
        SII6731 = (data['emissionLines/totalLineLuminosity:sulfurII6731'] *
                   3.839e26 * u.W).value
        SIItot = SII6716 + SII6731
        OIIItot = OIII5007 + OIII4959
        OIItot = OII3726 + OII3729

        # Reduce the sample size by drawing self.sim_drawnum galaxies

        # indices = np.random.choice(np.arange(len(Halpha)), size=self.sim_drawnum, replace=False)

        indices = self.sdsscat.drawinds(z,
                                        size=self.sim_drawnum,
                                        catname=catalog_name)

        self.z = z[indices]
        self.ha = Halpha[indices]
        self.hb = Hbeta[indices]
        self.oii = OIItot[indices]
        self.oiii = OIIItot[indices]
        self.nii6584 = NII6584[indices]
        self.oiii5007 = OIII5007[indices]
        self.oiii4959 = OIII4959[indices]
        self.oii3726 = OII3726[indices]
        self.oii3729 = OII3729[indices]
        self.sii6716 = SII6716[indices]
        self.sii6731 = SII6731[indices]
        self.siitot = SIItot[indices]

        #=========================================
        # End Reading in Data
        #=========================================

        #=========================================
        # Perform the Test and Return Results
        #=========================================

        if self.truncate_cat_name:
            thisfig, pvalue, medianshift = self.makeplot(
                catalog_name.split('_')[0])
        else:
            thisfig, pvalue, medianshift = self.makeplot(catalog_name)

        self.figlist.append(thisfig)
        self.runcat_name.append(catalog_name)

        if np.log10(pvalue) >= -4. and np.linalg.norm(medianshift) <= 0.25:
            return TestResult(pvalue, passed=True)
        elif np.linalg.norm(medianshift) <= 0.25:
            return TestResult(pvalue,
                              passed=False,
                              summary='P-value must exceed 1e-4.')
        elif np.log10(pvalue) >= -4.:
            return TestResult(
                pvalue,
                passed=False,
                summary=
                'Total median shift must be less than or equal to 0.25 dex.')
        else:
            return TestResult(
                pvalue,
                passed=False,
                summary=
                'P-value must exceed 1e-4 and total median shift must be less than or equal to 0.25 dex.'
            )
Esempio n. 25
0
def validate_instance_catalog_magnitudes(cat_dir, obsid, seed=99, nrows=-1):
    """
    Parameters
    ----------
    cat_dir is the parent dir of $obsid

    obsid is the obsHistID of the pointing

    seed is the seed for a random number generator

    nrows is the number of galaxies to test (if <0, test all of them)
    """
    agn_dtype = np.dtype([('galaxy_id', int), ('twinkles_id', int)])
    agn_cache = np.genfromtxt(os.path.join(os.environ['TWINKLES_DIR'], 'data',
                                           'cosmoDC2_v1.1.4_agn_cache.csv'),
                              dtype=agn_dtype,
                              delimiter=',',
                              skip_header=1)

    sne_cache = np.genfromtxt(os.path.join(os.environ['TWINKLES_DIR'], 'data',
                                           'cosmoDC2_v1.1.4_sne_cache.csv'),
                              dtype=agn_dtype,
                              delimiter=',',
                              skip_header=1)

    sprinkled_gid = np.append(agn_cache['galaxy_id'], sne_cache['galaxy_id'])

    colnames = [
        'obj', 'uniqueID', 'ra', 'dec', 'magnorm', 'sed', 'redshift', 'g1',
        'g2', 'kappa', 'dra', 'ddec', 'src_type', 'major', 'minor',
        'positionAngle', 'sindex', 'dust_rest', 'rest_av', 'rest_rv',
        'dust_obs', 'obs_av', 'obs_rv'
    ]

    to_drop = [
        'obj', 'g1', 'g2', 'kappa', 'dra', 'ddec', 'src_type', 'major',
        'minor', 'positionAngle', 'sindex', 'dust_rest', 'dust_obs'
    ]

    col_types = {
        'magnorm': float,
        'redshift': float,
        'rest_av': float,
        'rest_rv': float,
        'sed': bytes,
        'uniqueID': int
    }

    assert os.path.isdir(cat_dir)
    data_dir = os.path.join(cat_dir, '%.8d' % obsid)
    if not os.path.isdir(data_dir):
        raise RuntimeError('\n\n%s\nis not a dir\n\n' % data_dir)

    phosim_file = os.path.join(data_dir, 'phosim_cat_%d.txt' % obsid)
    assert os.path.isfile(phosim_file)
    bandpass_name = None
    bandpass_name_list = 'ugrizy'
    with open(phosim_file, 'r') as in_file:
        for line in in_file:
            params = line.strip().split()
            if params[0] == 'filter':
                bandpass_name = bandpass_name_list[int(params[1])]

    assert bandpass_name is not None

    (tot_dict, hw_dict) = BandpassDict.loadBandpassesFromFiles()

    bandpass = hw_dict[bandpass_name]

    disk_file = os.path.join(data_dir, 'disk_gal_cat_%d.txt.gz' % obsid)
    if not os.path.isfile(disk_file):
        raise RuntimeError("%s is not a file" % disk_file)

    bulge_file = os.path.join(data_dir, 'bulge_gal_cat_%d.txt.gz' % obsid)
    assert os.path.isfile(bulge_file)

    knots_file = os.path.join(data_dir, 'knots_cat_%d.txt.gz' % obsid)
    assert os.path.isfile(knots_file)

    print('reading disks')
    disk_df = pd.read_csv(disk_file,
                          delimiter=' ',
                          compression='gzip',
                          names=colnames,
                          dtype=col_types,
                          nrows=None)
    disk_df.drop(labels=to_drop, axis='columns', inplace=True)
    print('read disks')

    disk_df['galaxy_id'] = pd.Series(disk_df['uniqueID'] // 1024,
                                     index=disk_df.index)
    disk_df = disk_df.set_index('galaxy_id')

    print('reading bulges')
    bulge_df = pd.read_csv(bulge_file,
                           delimiter=' ',
                           compression='gzip',
                           names=colnames,
                           dtype=col_types,
                           nrows=None)

    bulge_df.drop(labels=to_drop, axis='columns', inplace=True)
    print('read bulges')

    bulge_df['galaxy_id'] = pd.Series(bulge_df['uniqueID'] // 1024,
                                      index=bulge_df.index)
    bulge_df = bulge_df.set_index('galaxy_id')

    for ii in range(len(colnames)):
        colnames[ii] = colnames[ii] + '_knots'
    for ii in range(len(to_drop)):
        to_drop[ii] = to_drop[ii] + '_knots'

    print('reading knots')
    knots_df = pd.read_csv(knots_file,
                           delimiter=' ',
                           compression='gzip',
                           names=colnames,
                           dtype=col_types,
                           nrows=None)
    knots_df.drop(labels=to_drop, axis='columns', inplace=True)
    print('read knots')

    knots_df['galaxy_id'] = pd.Series(knots_df['uniqueID_knots'] // 1024,
                                      index=knots_df.index)
    knots_df = knots_df.set_index('galaxy_id')

    wanted_col = [
        'sed', 'magnorm', 'redshift', 'rest_av', 'rest_rv', 'ra', 'dec'
    ]

    galaxy_df = disk_df[wanted_col].join(bulge_df[wanted_col],
                                         how='outer',
                                         lsuffix='_disk',
                                         rsuffix='_bulge')

    for ii in range(len(wanted_col)):
        wanted_col[ii] = wanted_col[ii] + '_knots'
    galaxy_df = galaxy_df.join(knots_df[wanted_col],
                               how='outer',
                               rsuffix='_knots')

    valid_galaxies = np.where(
        np.logical_not(np.in1d(galaxy_df.index, sprinkled_gid)))

    galaxy_df = galaxy_df.iloc[valid_galaxies]

    ra_center = np.nanmedian(galaxy_df['ra_disk'].values)
    dec_center = np.nanmedian(galaxy_df['dec_disk'].values)

    dd = angularSeparation(ra_center, dec_center, galaxy_df['ra_disk'].values,
                           galaxy_df['dec_disk'].values)
    radius_deg = np.nanmax(dd)
    ra_rad = np.radians(ra_center)
    dec_rad = np.radians(dec_center)
    vv = np.array([
        np.cos(ra_rad) * np.cos(dec_rad),
        np.sin(ra_rad) * np.cos(dec_rad),
        np.sin(dec_rad)
    ])

    healpix_list = healpy.query_disc(32,
                                     vv,
                                     np.radians(radius_deg),
                                     nest=False,
                                     inclusive=True)

    gal_id_values = galaxy_df.index.values

    cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image')
    cat_qties = {}
    cat_qties['galaxy_id'] = []
    cat_qties['ra'] = []
    cat_qties['dec'] = []
    for hp in healpix_list:
        hp_query = GCRQuery('healpix_pixel==%d' % hp)
        local_qties = cat.get_quantities(['galaxy_id', 'ra', 'dec'],
                                         native_filters=[hp_query])
        valid = np.in1d(local_qties['galaxy_id'], gal_id_values)
        if valid.any():
            for k in local_qties:
                cat_qties[k].append(local_qties[k][valid])

    for k in cat_qties:
        cat_qties[k] = np.concatenate(cat_qties[k])

    cat_dexes = np.arange(len(cat_qties['galaxy_id']), dtype=int)

    if nrows > 0:
        rng = np.random.RandomState(seed)
        dexes = rng.choice(galaxy_df.index.values, size=nrows, replace=False)
        galaxy_df = galaxy_df.loc[dexes]

    galaxy_df = galaxy_df.sort_index()
    invalid_knots = np.where(
        np.logical_not(
            np.isfinite(galaxy_df['magnorm_knots'].values.astype(np.float))))

    dd = angularSeparation(ra_center, dec_center, cat_qties['ra'],
                           cat_qties['dec'])

    dd_cut = np.where(dd < (radius_deg + 0.05))
    gid = cat_qties['galaxy_id'][dd_cut]
    cat_dexes = cat_dexes[dd_cut]

    in1d_valid_dexes = np.where(
        np.in1d(gid, galaxy_df.index.values, assume_unique=True))
    valid_dexes = cat_dexes[in1d_valid_dexes]
    gid = gid[in1d_valid_dexes]

    sorted_dex = np.argsort(gid)
    valid_dexes = valid_dexes[sorted_dex]

    assert len(gid) == len(galaxy_df.index.values)
    np.testing.assert_array_equal(gid[sorted_dex], galaxy_df.index.values)

    mag_name = 'mag_true_%s_lsst' % bandpass_name
    qties = {}
    qties['galaxy_id'] = []
    qties[mag_name] = []
    for hp in healpix_list:
        hp_query = GCRQuery('healpix_pixel==%d' % hp)
        local_qties = cat.get_quantities(['galaxy_id', mag_name],
                                         native_filters=[hp_query])

        valid = np.in1d(local_qties['galaxy_id'], gal_id_values)
        if valid.any():
            for k in local_qties:
                qties[k].append(local_qties[k][valid])

    for k in qties:
        qties[k] = np.concatenate(qties[k])

    np.testing.assert_array_equal(qties['galaxy_id'], cat_qties['galaxy_id'])

    mags = qties[mag_name][valid_dexes]
    gid = qties['galaxy_id'][valid_dexes]

    assert len(gid) == len(mags)
    assert len(mags) > 0
    if nrows > 0:
        assert len(mags) == nrows

    t_start = time.time()
    n_proc = 3
    d_proc = len(gid) // n_proc
    mgr = multiprocessing.Manager()
    out_dict = mgr.dict()
    p_list = []
    for i_start in range(0, len(gid), d_proc):
        mag_true = mags[i_start:i_start + d_proc]
        galaxy_arr = galaxy_df.iloc[i_start:i_start + d_proc]
        p = multiprocessing.Process(target=validate_batch,
                                    args=(mag_true, galaxy_arr, bandpass,
                                          out_dict))
        p.start()
        p_list.append(p)

    for p in p_list:
        p.join()

    assert len(list(out_dict.keys())) > 0

    d_mag_max = 0.0
    for k in out_dict.keys():
        if out_dict[k] > d_mag_max:
            d_mag_max = out_dict[k]

    if d_mag_max > 1.0e-5:
        raise RuntimeError("\nobsHistID failed magnitud validation\n"
                           "d_mag_max %e" % d_mag_max)
Esempio n. 26
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):

        bands = set(sum((c.split('-') for c in self.colors), []))
        if self.rest_frame:
            possible_names = ('Mag_{}_lsst', 'Mag_{}_sdss',
                              'Mag_true_{}_lsst_z0', 'Mag_true_{}_sdss_z0')
        else:
            possible_lsst_names = (('mag_{}_noagn_lsst',
                                    'mag_true_{}_noagn_lsst')
                                   if self.exclude_agn else
                                   ('mag_{}_cModel', 'mag_{}_lsst',
                                    'mag_true_{}_lsst'))
            possible_non_lsst_names = ('mag_{}_sdss', 'mag_{}_des',
                                       'mag_true_{}_sdss', 'mag_true_{}_des')
            if self.use_lsst:
                print('Selecting lsst magnitudes if available')
                possible_names = possible_lsst_names + possible_non_lsst_names
            else:
                possible_names = possible_non_lsst_names + possible_lsst_names

        labels = {
            band: catalog_instance.first_available(*(n.format(band)
                                                     for n in possible_names))
            for band in bands
        }
        labels = {k: v for k, v in labels.items() if v}

        if len(labels) < 2:
            return TestResult(
                skipped=True,
                summary=
                'magnitudes in mock catalog do not have at least two needed bands.'
            )
        filters = set(
            (v.split('_')[(-2 if 'z0' in v else -1)] for v in labels.values()))

        if len(filters) > 1:
            return TestResult(
                skipped=True,
                summary='magnitudes in mock catalog have mixed filters.')
        filter_this = filters.pop()

        if self.lightcone:
            labels['redshift'] = catalog_instance.first_available(
                'redshift_true_galaxy', 'redshift_true', 'redshift')
            if not labels['redshift']:
                return TestResult(
                    skipped=True,
                    summary='mock catalog does not have redshift.')

            # Load mock catalog data
            filters = [
                '{} > {}'.format(labels['redshift'], self.zlo),
                '{} < {}'.format(labels['redshift'], self.zhi)
            ]
        else:
            filters = None
            redshift = catalog_instance.redshift

        data = catalog_instance.get_quantities(list(labels.values()), filters)
        # filter catalog data further for matched object catalogs
        if np.ma.isMaskedArray(data[labels['redshift']]):
            galmask = np.ma.getmask(data[labels['redshift']])
            data = {k: data[v][galmask] for k, v in labels.items()}
        else:
            data = {k: data[v] for k, v in labels.items()}

        # Color transformation
        color_trans = None
        if self.color_transformation_q:
            color_trans_name = None
            if self.validation_catalog == 'DEEP2' and (filter_this == 'sdss' or
                                                       filter_this == 'des'):
                color_trans_name = '{}2cfht'.format(filter_this)
            elif self.validation_catalog == 'SDSS' and filter_this == 'des':
                color_trans_name = 'des2sdss'
            if color_trans_name:
                color_trans = color_transformation[color_trans_name]

        filter_title = r'\mathrm{{{}}}'.format(filter_this.upper())
        if color_trans:
            data_transformed = {}
            for band in bands:
                try:
                    data_transformed[band] = ne.evaluate(color_trans[band],
                                                         local_dict=data,
                                                         global_dict={})
                except KeyError:
                    continue

            filter_title = (r'{}\rightarrow\mathrm{{{}}}'.format(
                filter_title, self.validation_catalog)
                            if data_transformed else filter_title)
            data_transformed['redshift'] = data['redshift']
            data = data_transformed
            del data_transformed

        if self.obs_r_mag_limit and not self.rest_frame:
            data = GCRQuery('r < {}'.format(self.obs_r_mag_limit)).filter(data)
        elif self.Mag_r_limit and self.rest_frame:
            data = GCRQuery('r < {}'.format(self.Mag_r_limit)).filter(data)

        # Compute color distribution (PDF, CDF etc.)
        mock_color_dist = self.get_color_dist(data)

        # Calculate Cramer-von Mises statistic
        color_shift = {}
        cvm_omega = {}
        cvm_omega_shift = {}
        if self.validation_catalog:
            for color in self.colors:
                if not ((color in self.obs_color_dist) and
                        (color in mock_color_dist)):
                    continue
                color_shift[color] = self.obs_color_dist[color][
                    'median'] - mock_color_dist[color]['median']
                cvm_omega[color] = CvM_statistic(
                    mock_color_dist[color]['nsample'],
                    self.obs_color_dist[color]['nsample'],
                    mock_color_dist[color]['binctr'],
                    mock_color_dist[color]['cdf'],
                    self.obs_color_dist[color]['binctr'],
                    self.obs_color_dist[color]['cdf'])
                cvm_omega_shift[color] = CvM_statistic(
                    mock_color_dist[color]['nsample'],
                    self.obs_color_dist[color]['nsample'],
                    mock_color_dist[color]['binctr'] + color_shift[color],
                    mock_color_dist[color]['cdf'],
                    self.obs_color_dist[color]['binctr'],
                    self.obs_color_dist[color]['cdf'])

        redshift_title = '{:.2f} < z < {:.2f}'.format(
            self.zlo,
            self.zhi) if self.lightcone else 'z = {:.2f}'.format(redshift)
        catalog_color = next(self.plot_colors)
        self.make_plots(mock_color_dist, color_shift, cvm_omega,
                        cvm_omega_shift, catalog_name, output_dir,
                        filter_title, redshift_title, catalog_color)
        self.make_plots(mock_color_dist,
                        color_shift,
                        cvm_omega,
                        cvm_omega_shift,
                        catalog_name,
                        output_dir,
                        filter_title,
                        redshift_title,
                        catalog_color,
                        summary=True)

        # Write to summary file
        fn = os.path.join(output_dir, self.summary_output_file)
        with open(fn, 'a') as f:
            if color_trans:
                f.write('Color transformation: {}\n'.format(color_trans_name))
            else:
                f.write('No color transformation\n')
            f.write('{}\n'.format(redshift_title))
            if self.obs_r_mag_limit:
                f.write('r_mag < %2.3f\n\n' % (self.obs_r_mag_limit))
            elif self.Mag_r_limit:
                f.write('Mag_r < %2.3f\n\n' % (self.Mag_r_limit))
            if self.validation_catalog:
                for color in self.colors:
                    if self.validation_catalog and not (
                        (color in self.obs_color_dist) and
                        (color in mock_color_dist)):
                        continue
                    f.write("Median " + color +
                            " difference (obs - mock) = %2.3f\n" %
                            (color_shift[color]))
                    f.write(color + ": {} = {:2.6f}\n".format(
                        'CvM statistic', cvm_omega[color]))
                    f.write(color + " (shifted): {} = {:2.6f}\n".format(
                        'CvM statistic', cvm_omega_shift[color]))
                    f.write("\n")

        return TestResult(inspect_only=True)
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        #check catalog data for required quantities
        mag_field = catalog_instance.first_available(*self.possible_mag_fields)
        if not mag_field:
            return TestResult(skipped=True,
                              summary='Missing required mag_field option')
        self.zlabel = catalog_instance.first_available(
            *self.possible_redshifts)
        if not self.zlabel:
            return TestResult(skipped=True,
                              summary='Missing required redhsift option')
        self.filters = [(lambda z:
                         (z > self.zlo) & (z < self.zhi), self.zlabel)]

        jackknife_quantities = [self.zlabel, self.ra, self.dec
                                ] if self.jackknife else [self.zlabel]
        for jq in jackknife_quantities:
            if not catalog_instance.has_quantity(jq):
                return TestResult(
                    skipped=True,
                    summary='Missing required {} quantity'.format(jq))

        required_quantities = jackknife_quantities + [mag_field]
        filtername = mag_field.split('_')[(-1 if mag_field.startswith('m') else
                                           -2)].upper()  #extract filtername
        filelabel = '_'.join((filtername, self.band))

        #setup plots
        if self.truncate_cat_name:
            catalog_name = re.split('_', catalog_name)[0]
        if self.replace_cat_name:
            for k, v in self.replace_cat_name.items():
                catalog_name = re.sub(k, v, catalog_name)

        fig, ax = plt.subplots(self.nrows,
                               self.ncolumns,
                               figsize=(self.figx_p, self.figy_p),
                               sharex='col')
        catalog_color = next(self.colors)
        catalog_marker = next(self.markers)

        #initialize arrays for storing histogram sums
        N_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1),
                           dtype=np.int)
        sumz_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1))

        jackknife_data = {}
        #get catalog data by looping over data iterator (needed for large catalogs) and aggregate histograms
        for catalog_data in catalog_instance.get_quantities(
                required_quantities, filters=self.filters,
                return_iterator=True):
            catalog_data = GCRQuery(
                *((np.isfinite, col)
                  for col in catalog_data)).filter(catalog_data)
            # filter catalog data further for matched object catalogs
            if np.ma.isMaskedArray(catalog_data[self.zlabel]):
                galmask = np.ma.getmask(catalog_data[self.zlabel])
                catalog_data = {k: v[galmask] for k, v in catalog_data.items()}

            for n, (cut_lo, cut_hi, N, sumz) in enumerate(
                    zip_longest(
                        self.mag_lo,
                        self.mag_hi,
                        N_array.reshape(
                            -1, N_array.shape[-1]
                        ),  #flatten all but last dimension of array
                        sumz_array.reshape(-1, sumz_array.shape[-1]),
                    )):
                if cut_lo:
                    mask = (catalog_data[mag_field] < cut_lo)
                    if cut_hi:
                        mask &= (catalog_data[mag_field] >= cut_hi)
                    z_this = catalog_data[self.zlabel][mask]

                    #save data for jackknife errors
                    if self.jackknife:  #store all the jackknife data in numpy arrays for later processing
                        if str(n) not in jackknife_data.keys(
                        ):  #initialize sub-dict
                            jackknife_data[str(n)] = dict(
                                zip(required_quantities, [
                                    np.asarray([])
                                    for jq in jackknife_quantities
                                ]))
                        for jkey in jackknife_data[str(n)].keys():
                            jackknife_data[str(n)][jkey] = np.hstack(
                                (jackknife_data[str(n)][jkey],
                                 catalog_data[jkey][mask]))

                    del mask

                    #bin catalog_data and accumulate subplot histograms
                    N += np.histogram(z_this, bins=self.zbins)[0]
                    sumz += np.histogram(z_this,
                                         bins=self.zbins,
                                         weights=z_this)[0]

        #loop over magnitude cuts and make plots
        results = {}
        scores = np.array([self.pass_limit] * self.nplots)
        for n, (ax_this, summary_ax_this, cut_lo, cut_hi, N, sumz, z0,
                z0err) in enumerate(
                    zip_longest(
                        ax.flat,
                        self.summary_ax.flat,
                        self.mag_lo,
                        self.mag_hi,
                        N_array.reshape(-1, N_array.shape[-1]),
                        sumz_array.reshape(-1, sumz_array.shape[-1]),
                        self.validation_data.get('z0values', []),
                        self.validation_data.get('z0errors', []),
                    )):

            if cut_lo is None:  #cut_lo is None if self.mag_lo is exhausted
                if ax_this is not None:
                    ax_this.set_visible(False)
                if summary_ax_this is not None:
                    summary_ax_this.set_visible(False)
            else:
                cut_label = '{} $< {}$'.format(self.band, cut_lo)
                if cut_hi:
                    cut_label = '${} \\leq $ {}'.format(
                        cut_hi, cut_label)  #also appears in txt file

                if z0 is None and 'z0const' in self.validation_data:  #alternate format for some validation data
                    z0 = self.validation_data[
                        'z0const'] + self.validation_data['z0linear'] * cut_lo

                N = N.astype(np.float64)

                if self.jackknife:
                    covariance = self.get_jackknife_errors(
                        self.N_jack, jackknife_data[str(n)], N)
                else:
                    covariance = np.diag(N)

                meanz = sumz / N
                sumN = N.sum()
                total = '(# of galaxies = {})'.format(sumN)

                if self.normed:
                    scale = sumN * (self.zbins[1:] - self.zbins[:-1])
                    N /= scale
                    covariance /= np.outer(scale, scale)

                Nerrors = np.sqrt(np.diag(covariance))

                #make subplot
                catalog_label = ' '.join(
                    (catalog_name,
                     cut_label.replace(self.band,
                                       filtername + ' ' + self.band)))
                validation_label = ' '.join(
                    (self.validation_data.get('label', ''), cut_label))
                key = cut_label.replace('$', '').replace('\\leq', '<=')
                results[key] = {
                    'meanz': meanz,
                    'total': total,
                    'N': N,
                    'N+-': Nerrors
                }
                self.catalog_subplot(ax_this, meanz, N, Nerrors, catalog_color,
                                     catalog_marker, catalog_label)
                if z0 and z0 > 0:  # has validation data
                    fits = self.validation_subplot(ax_this, meanz, z0, z0err,
                                                   validation_label)
                    results[key].update(fits)
                    scores[n], inverse_cov = self.get_score(
                        N,
                        fits['fit'],
                        covariance,
                        use_diagonal_only=self.use_diagonal_only)
                    results[key]['score'] = 'Chi_sq/dof = {:11.4g}'.format(
                        scores[n])
                    if self.jackknife:
                        results[key]['inverse_cov_matrix'] = inverse_cov

                self.decorate_subplot(ax_this, n)

                #add curve for this catalog to summary plot
                self.catalog_subplot(summary_ax_this, meanz, N, Nerrors,
                                     catalog_color, catalog_marker,
                                     catalog_label)
                if self.first_pass and z0 and z0 > 0:
                    self.validation_subplot(
                        summary_ax_this, meanz, z0, z0err, validation_label
                    )  #add validation data if evaluating first catalog
                self.decorate_subplot(summary_ax_this, n)

        #save results for catalog and validation data in txt files
        for filename, dtype, comment, info, info2 in zip_longest(
            (filelabel, self.observation), ('N', 'fit'), (filtername, ),
            ('total', 'z0'), ('score', 'z0err')):
            if filename:
                with open(
                        os.path.join(output_dir, 'Nvsz_' + filename + '.txt'),
                        'ab') as f_handle:  #open file in append binary mode
                    #loop over magnitude cuts in results dict
                    for key, value in results.items():
                        self.save_quantities(dtype,
                                             value,
                                             f_handle,
                                             comment=' '.join(
                                                 ((comment or ''), key,
                                                  value.get(info, ''),
                                                  value.get(info2, ''))))

                if self.jackknife:
                    with open(
                            os.path.join(output_dir,
                                         'Nvsz_' + filename + '.txt'),
                            'a') as f_handle:  #open file in append mode
                        f_handle.write('\nInverse Covariance Matrices:\n')
                        for key in results.keys():
                            self.save_matrix(
                                results[key]['inverse_cov_matrix'],
                                f_handle,
                                comment=key)

        if self.first_pass:  #turn off validation data plot in summary for remaining catalogs
            self.first_pass = False

        #make final adjustments to plots and save figure
        self.post_process_plot(fig)
        fig.savefig(os.path.join(output_dir, 'Nvsz_' + filelabel + '.png'))
        plt.close(fig)

        #compute final score
        #final_scores = (scores < self.pass_limit)
        #pass or fail on average score rather than demanding that all distributions pass
        score_ave = np.mean(scores)
        return TestResult(score_ave, passed=score_ave < self.pass_limit)
Esempio n. 28
0
    0, '/global/homes/i/ihasan/python_stuff/lib/python3.7/site-packages/')
from pzblend import PhotozBlend
#sys.path.insert(0,"/global/cfs/cdirs/lsst/groups/PZ/PhotoZDC2/run2.2i_dr6_test/gcr-catalogs/lib/python3.7/site-packages/GCRCatalogs-0.18.1-py3.7.egg")
#sys.path.insert(0,'/global/homes/i/ihasan/python_stuff/lib/python3.7/site-packages/')
import GCRCatalogs
from GCR import GCRQuery

object_cat = GCRCatalogs.load_catalog('dc2_object_run2.2i_dr6a_with_photoz')

tract_ids = [
    2731, 2904, 2906, 3081, 3082, 3084, 3262, 3263, 3265, 3448, 3450, 3831,
    3832, 3834, 4029, 4030, 4031, 2905, 3083, 3264, 3449, 3833
]

basic_cuts = [
    GCRQuery('extendedness > 0'),  # Extended objects
    GCRQuery(
        (np.isfinite, 'mag_i')),  # Select objects that have i-band magnitudes
    GCRQuery(
        'clean'
    ),  # The source has no flagged pixels (interpolated, saturated, edge, clipped...) 
    # and was not skipped by the deblender
    GCRQuery('xy_flag == 0'),  # Bad centroiding
    GCRQuery('snr_i_cModel >= 10'),
    GCRQuery('detect_isPrimary'),  # (from this and below) basic flag cuts 
    ~GCRQuery('deblend_skipped'),
    ~GCRQuery('base_PixelFlags_flag_edge'),
    ~GCRQuery('base_PixelFlags_flag_interpolatedCenter'),
    ~GCRQuery('base_PixelFlags_flag_saturatedCenter'),
    ~GCRQuery('base_PixelFlags_flag_crCenter'),
    ~GCRQuery('base_PixelFlags_flag_bad'),
Esempio n. 29
0
plt.legend(loc='best', framealpha=0.3)
plt.xlabel('z')
plt.ylabel('M ')
#plt.title('Halo ID:  {}\nHalo Mass:  {:.2e} h^-1 Msun'.format(cluster['halo_id'], cluster['halo_mass']))
plt.savefig(outpath + "mass_redshift.png", bbox_inches='tight')
plt.close()
print('********Plot saved********')

#number of clusters to debug
nmax = 3

cluster_data = Table(cluster_data)
for i, cluster in enumerate(cluster_data):
    if (i >= nmax):
        break  # plot only the first 3
    members = GCRQuery('halo_id == {}'.format(
        cluster['halo_id'])).filter(galaxy_data)
    plt.figure()
    plt.scatter(members['ra'],
                members['dec'],
                s=(24 - members['mag_i']) * 8,
                label='Galaxy Members [{}]'.format(len(members['ra'])))
    plt.plot(cluster['ra'], cluster['dec'], 'xr', label='Cluster Center')
    plt.legend(loc='best', framealpha=0.3)
    plt.xlabel(r'ra [deg]')
    plt.ylabel(r'dec [deg]')
    plt.title('Halo ID:  {}\nHalo Mass:  {:.2e} h^-1 Msun'.format(
        cluster['halo_id'], cluster['halo_mass']))
    plt.savefig(outpath + format(cluster['halo_id']) + ".png",
                bbox_inches='tight')
    plt.close()
    print('********Plot saved********')
Esempio n. 30
0
    def run_on_single_catalog(self, catalog_instance, catalog_name,
                              output_dir):
        '''
        Loop over magnitude cuts and make plots
        '''
        # load catalog data
        colnames = dict()
        colnames['z'] = catalog_instance.first_available(
            'redshift', 'redshift_true')
        colnames['ra'] = catalog_instance.first_available('ra', 'ra_true')
        colnames['dec'] = catalog_instance.first_available('dec', 'dec_true')
        colnames['mag'] = catalog_instance.first_available(
            *self.possible_mag_fields)

        if not all(v for v in colnames.values()):
            return TestResult(skipped=True,
                              summary='Missing requested quantities')

        filters = [(np.isfinite, c) for c in colnames.values()]
        filters.extend((
            '{} < {}'.format(
                colnames['mag'],
                max(mag_bin['mag_max'] for mag_bin in self.mag_bins)),
            '{} >= {}'.format(
                colnames['mag'],
                min(mag_bin['mag_min'] for mag_bin in self.mag_bins)),
        ))
        if self.need_distance:
            filters.extend((
                '{} < {}'.format(
                    colnames['z'],
                    max(mag_bin['cz_max']
                        for mag_bin in self.mag_bins) / self._C),
                '{} >= {}'.format(
                    colnames['z'],
                    min(mag_bin['cz_min']
                        for mag_bin in self.mag_bins) / self._C),
            ))
        catalog_data = catalog_instance.get_quantities(list(colnames.values()),
                                                       filters=filters)
        catalog_data = {k: catalog_data[v] for k, v in colnames.items()}

        # create random
        rand_ra, rand_dec = generate_uniform_random_ra_dec_footprint(
            catalog_data['ra'].size * self.random_mult,
            get_healpixel_footprint(catalog_data['ra'], catalog_data['dec'],
                                    self.random_nside),
            self.random_nside,
        )

        if not self.need_distance:
            rand_cat = treecorr.Catalog(ra=rand_ra,
                                        dec=rand_dec,
                                        ra_units='deg',
                                        dec_units='deg')
            del rand_ra, rand_dec
            rr = treecorr.NNCorrelation(**self._treecorr_config)
            rr.process(rand_cat)

        fig, ax = plt.subplots()
        try:
            for mag_bin, color in zip(
                    self.mag_bins,
                    plt.cm.plasma_r(np.linspace(0.1, 1, len(self.mag_bins)))):  #pylint: disable=E1101

                # filter catalog data for this bin
                filters = [
                    'mag < {}'.format(mag_bin['mag_max']),
                    'mag >= {}'.format(mag_bin['mag_min']),
                ]
                if self.need_distance:
                    filters.extend((
                        'z < {}'.format(mag_bin['cz_max'] / self._C),
                        'z >= {}'.format(mag_bin['cz_min'] / self._C),
                    ))

                catalog_data_this = GCRQuery(*filters).filter(catalog_data)

                cat = treecorr.Catalog(
                    ra=catalog_data_this['ra'],
                    dec=catalog_data_this['dec'],
                    ra_units='deg',
                    dec_units='deg',
                    r=(redshift2dist(catalog_data_this['z'],
                                     catalog_instance.cosmology)
                       if self.need_distance else None),
                )

                del catalog_data_this

                treecorr_config = self._treecorr_config.copy()
                if 'pi_max' in mag_bin:
                    treecorr_config['min_rpar'] = -mag_bin['pi_max']
                    treecorr_config['max_rpar'] = mag_bin['pi_max']

                if self.need_distance:
                    rand_cat = treecorr.Catalog(
                        ra=rand_ra,
                        dec=rand_dec,
                        ra_units='deg',
                        dec_units='deg',
                        r=generate_uniform_random_dist(
                            rand_ra.size,
                            *redshift2dist(
                                np.array(
                                    [mag_bin['cz_min'], mag_bin['cz_max']]) /
                                self._C, catalog_instance.cosmology)),
                    )
                    rr = treecorr.NNCorrelation(treecorr_config)
                    rr.process(rand_cat)

                dd = treecorr.NNCorrelation(treecorr_config)
                dr = treecorr.NNCorrelation(treecorr_config)
                rd = treecorr.NNCorrelation(treecorr_config)

                dd.process(cat)
                dr.process(rand_cat, cat)
                rd.process(cat, rand_cat)

                output_filepath = os.path.join(
                    output_dir,
                    self.output_filename_template.format(
                        mag_bin['mag_min'], mag_bin['mag_max']))
                dd.write(output_filepath, rr, dr, rd)

                xi, var_xi = dd.calculateXi(rr, dr, rd)
                xi_rad = np.exp(dd.meanlogr)
                xi_sig = np.sqrt(var_xi)

                ax.loglog(self.validation_data[:, 0],
                          self.validation_data[:, mag_bin['data_col']],
                          c=color,
                          label=self.label_template.format(
                              mag_bin['mag_min'], mag_bin['mag_max']))
                if 'data_err_col' in mag_bin:
                    y1 = self.validation_data[:, mag_bin[
                        'data_col']] + self.validation_data[:, mag_bin[
                            'data_err_col']]
                    y2 = self.validation_data[:, mag_bin[
                        'data_col']] - self.validation_data[:, mag_bin[
                            'data_err_col']]
                    y2[y2 <= 0] = self.fig_ylim[0] * 0.9
                    ax.fill_between(self.validation_data[:, 0],
                                    y1,
                                    y2,
                                    lw=0,
                                    color=color,
                                    alpha=0.2)
                scale_wp = mag_bin[
                    'pi_max'] * 2.0 if 'pi_max' in mag_bin else 1.0
                ax.errorbar(xi_rad,
                            xi * scale_wp,
                            xi_sig * scale_wp,
                            marker='o',
                            ls='',
                            c=color)

            ax.legend(loc='best')
            ax.set_xlabel(self.fig_xlabel)
            ax.set_ylim(*self.fig_ylim)
            ax.set_ylabel(self.fig_ylabel)
            ax.set_title('{} vs. {}'.format(catalog_name, self.data_label),
                         fontsize='medium')

        finally:
            fig.savefig(os.path.join(output_dir,
                                     '{:s}.png'.format(self.test_name)),
                        bbox_inches='tight')
            plt.close(fig)

        #TODO: calculate summary statistics
        return TestResult(inspect_only=True)