Esempio n. 1
0
        final['CO'] = hh[1]['CO'][:]
        hh.close()
    else:
        binSizeP = (final['RPMAX'] - final['RPMIN']) / final['NP']
        binSizeT = (final['RTMAX'] - 0.) / final['NT']
        if not args.do_not_smooth_cov:
            print('INFO: The covariance will be smoothed')
            final['CO'] = smooth_cov(final['DA'],
                                     final['WE'],
                                     final['RP'],
                                     final['RT'],
                                     delta_r_trans=binSizeT,
                                     delta_r_par=binSizeP)
        else:
            print('INFO: The covariance will not be smoothed')
            final['CO'] = compute_cov(final['DA'], final['WE'])

    ### Test covariance matrix
    try:
        scipy.linalg.cholesky(final['CO'])
    except scipy.linalg.LinAlgError:
        print('WARNING: Matrix is not positive definite')

    ### Measurement
    final['DA'] = (final['DA'] * final['WE']).sum(axis=0)
    final['WE'] = final['WE'].sum(axis=0)
    w = final['WE'] > 0.
    final['DA'][w] /= final['WE'][w]

    ### Distortion matrix
    if args.dmat is not None:
Esempio n. 2
0
def main(cmdargs):
    """Export auto and cross-correlation for the fitter."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Export auto and cross-correlation for the fitter.')

    parser.add_argument(
        '--data',
        type=str,
        default=None,
        required=True,
        help='Correlation produced via picca_cf.py, picca_xcf.py, ...')

    parser.add_argument('--out',
                        type=str,
                        default=None,
                        required=True,
                        help='Output file name')

    parser.add_argument(
        '--dmat',
        type=str,
        default=None,
        required=False,
        help=(
            'Distortion matrix produced via picca_dmat.py, picca_xdmat.py... '
            '(if not provided will be identity)'))

    parser.add_argument(
        '--cov',
        type=str,
        default=None,
        required=False,
        help=('Covariance matrix (if not provided will be calculated by '
              'subsampling)'))

    parser.add_argument(
        '--cor',
        type=str,
        default=None,
        required=False,
        help=('Correlation matrix (if not provided will be calculated by '
              'subsampling)'))

    parser.add_argument(
        '--remove-shuffled-correlation',
        type=str,
        default=None,
        required=False,
        help='Remove a correlation from shuffling the distribution of los')

    parser.add_argument('--do-not-smooth-cov',
                        action='store_true',
                        default=False,
                        help='Do not smooth the covariance matrix')

    args = parser.parse_args(cmdargs)

    hdul = fitsio.FITS(args.data)

    r_par = np.array(hdul[1]['RP'][:])
    r_trans = np.array(hdul[1]['RT'][:])
    z = np.array(hdul[1]['Z'][:])
    num_pairs = np.array(hdul[1]['NB'][:])
    weights = np.array(hdul[2]['WE'][:])

    if 'DA_BLIND' in hdul[2].get_colnames():
        xi = np.array(hdul[2]['DA_BLIND'][:])
        data_name = 'DA_BLIND'
    else:
        xi = np.array(hdul[2]['DA'][:])
        data_name = 'DA'

    head = hdul[1].read_header()
    num_bins_r_par = head['NP']
    num_bins_r_trans = head['NT']
    r_trans_max = head['RTMAX']
    r_par_min = head['RPMIN']
    r_par_max = head['RPMAX']

    if "BLINDING" in head:
        blinding = head["BLINDING"]
    # older runs are not from DESI main survey and should not be blinded
    else:
        blinding = "none"
    hdul.close()

    if not args.remove_shuffled_correlation is None:
        hdul = fitsio.FITS(args.remove_shuffled_correlation)
        xi_shuffled = hdul['COR'][data_name][:]
        weight_shuffled = hdul['COR']['WE'][:]
        xi_shuffled = (xi_shuffled * weight_shuffled).sum(axis=1)
        weight_shuffled = weight_shuffled.sum(axis=1)
        w = weight_shuffled > 0.
        xi_shuffled[w] /= weight_shuffled[w]
        hdul.close()
        xi -= xi_shuffled[:, None]

    if args.cov is not None:
        userprint(("INFO: The covariance-matrix will be read from file: "
                   "{}").format(args.cov))
        hdul = fitsio.FITS(args.cov)
        covariance = hdul[1]['CO'][:]
        hdul.close()
    elif args.cor is not None:
        userprint(("INFO: The correlation-matrix will be read from file: "
                   "{}").format(args.cor))
        hdul = fitsio.FITS(args.cor)
        correlation = hdul[1]['CO'][:]
        hdul.close()
        if ((correlation.min() < -1.) or (correlation.min() > 1.)
                or (correlation.max() < -1.) or (correlation.max() > 1.)
                or np.any(np.diag(correlation) != 1.)):
            userprint(("WARNING: The correlation-matrix has some incorrect "
                       "values"))
        var = np.diagonal(correlation)
        correlation = correlation / np.sqrt(var * var[:, None])
        covariance = compute_cov(xi, weights)
        var = np.diagonal(covariance)
        covariance = correlation * np.sqrt(var * var[:, None])
    else:
        delta_r_par = (r_par_max - r_par_min) / num_bins_r_par
        delta_r_trans = (r_trans_max - 0.) / num_bins_r_trans
        if not args.do_not_smooth_cov:
            userprint("INFO: The covariance will be smoothed")
            covariance = smooth_cov(xi,
                                    weights,
                                    r_par,
                                    r_trans,
                                    delta_r_trans=delta_r_trans,
                                    delta_r_par=delta_r_par)
        else:
            userprint("INFO: The covariance will not be smoothed")
            covariance = compute_cov(xi, weights)

    xi = (xi * weights).sum(axis=0)
    weights = weights.sum(axis=0)
    w = weights > 0
    xi[w] /= weights[w]

    try:
        scipy.linalg.cholesky(covariance)
    except scipy.linalg.LinAlgError:
        userprint("WARNING: Matrix is not positive definite")

    if args.dmat is not None:
        hdul = fitsio.FITS(args.dmat)
        if data_name == "DA_BLIND" and 'DM_BLIND' in hdul[1].get_colnames():
            dmat = np.array(hdul[1]['DM_BLIND'][:])
            dmat_name = 'DM_BLIND'
        elif data_name == "DA_BlIND":
            userprint("Blinded correlations were given but distortion matrix "
                      "is unblinded. These files should not mix. Exiting...")
            sys.exit(1)
        elif 'DM_BLIND' in hdul[1].get_colnames():
            userprint(
                "Non-blinded correlations were given but distortion matrix "
                "is blinded. These files should not mix. Exiting...")
            sys.exit(1)
        else:
            dmat = hdul[1]['DM'][:]
            dmat_name = 'DM'

        try:
            r_par_dmat = hdul[2]['RP'][:]
            r_trans_dmat = hdul[2]['RT'][:]
            z_dmat = hdul[2]['Z'][:]
        except IOError:
            r_par_dmat = r_par.copy()
            r_trans_dmat = r_trans.copy()
            z_dmat = z.copy()
        if dmat.shape == (xi.size, xi.size):
            r_par_dmat = r_par.copy()
            r_trans_dmat = r_trans.copy()
            z_dmat = z.copy()
        hdul.close()
    else:
        dmat = np.eye(len(xi))
        r_par_dmat = r_par.copy()
        r_trans_dmat = r_trans.copy()
        z_dmat = z.copy()

    results = fitsio.FITS(args.out, 'rw', clobber=True)
    header = [
        {
            'name': "BLINDING",
            'value': blinding,
            'comment': 'String specifying the blinding strategy'
        },
        {
            'name': 'RPMIN',
            'value': r_par_min,
            'comment': 'Minimum r-parallel'
        },
        {
            'name': 'RPMAX',
            'value': r_par_max,
            'comment': 'Maximum r-parallel'
        },
        {
            'name': 'RTMAX',
            'value': r_trans_max,
            'comment': 'Maximum r-transverse'
        },
        {
            'name': 'NP',
            'value': num_bins_r_par,
            'comment': 'Number of bins in r-parallel'
        },
        {
            'name': 'NT',
            'value': num_bins_r_trans,
            'comment': 'Number of bins in r-transverse'
        },
        {
            'name': 'OMEGAM',
            'value': head['OMEGAM'],
            'comment': 'Omega_matter(z=0) of fiducial LambdaCDM cosmology'
        },
        {
            'name': 'OMEGAR',
            'value': head['OMEGAR'],
            'comment': 'Omega_radiation(z=0) of fiducial LambdaCDM cosmology'
        },
        {
            'name': 'OMEGAK',
            'value': head['OMEGAK'],
            'comment': 'Omega_k(z=0) of fiducial LambdaCDM cosmology'
        },
        {
            'name':
            'WL',
            'value':
            head['WL'],
            'comment':
            'Equation of state of dark energy of fiducial LambdaCDM cosmology'
        },
    ]
    comment = [
        'R-parallel', 'R-transverse', 'Redshift', 'Correlation',
        'Covariance matrix', 'Distortion matrix', 'Number of pairs'
    ]
    results.write([xi, r_par, r_trans, z, covariance, dmat, num_pairs],
                  names=[data_name, 'RP', 'RT', 'Z', 'CO', dmat_name, 'NB'],
                  comment=comment,
                  header=header,
                  extname='COR')
    comment = ['R-parallel model', 'R-transverse model', 'Redshift model']
    results.write([r_par_dmat, r_trans_dmat, z_dmat],
                  names=['DMRP', 'DMRT', 'DMZ'],
                  comment=comment,
                  extname='DMATTRI')
    results.close()
Esempio n. 3
0
                                        axis=0)
            data[key]['HEALPID'] = np.append(data[key]['HEALPID'], new_healpix)

    # Sort the data by the healpix values
    for key in sorted(list(data.keys())):
        sort = np.array(data[key]['HEALPID']).argsort()
        data[key]['DA'] = data[key]['DA'][sort]
        data[key]['WE'] = data[key]['WE'][sort]
        data[key]['HEALPID'] = data[key]['HEALPID'][sort]

    # Append the data
    xi = np.append(data[0]['DA'], data[1]['DA'], axis=1)
    weights = np.append(data[0]['WE'], data[1]['WE'], axis=1)

    # Compute the covariance
    covariance = compute_cov(xi, weights)

    # Get the cross-covariance
    num_bins = data[0]['DA'].shape[1]
    cross_covariance = covariance.copy()
    cross_covariance = cross_covariance[:, num_bins:]
    cross_covariance = cross_covariance[:num_bins, :]

    ### Get the cross-correlation
    var = np.diagonal(covariance)
    cor = covariance / np.sqrt(var * var[:, None])
    cross_correlation = cor.copy()
    cross_correlation = cross_correlation[:, num_bins:]
    cross_correlation = cross_correlation[:num_bins, :]

    ### Test if valid
Esempio n. 4
0
def main():
    # pylint: disable-msg=too-many-locals,too-many-branches,too-many-statements
    """Exports auto and cross-correlation of catalog of objects for the
    fitter."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description=('Export auto and cross-correlation of catalog of objects '
                     'for the fitter.'))

    parser.add_argument('--out',
                        type=str,
                        default=None,
                        required=True,
                        help='Output file name')

    parser.add_argument('--DD-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the data x data auto-correlation')

    parser.add_argument('--RR-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the random x random auto-correlation')

    parser.add_argument('--DR-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the data x random auto-correlation')

    parser.add_argument('--RD-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the random x data auto-correlation')

    parser.add_argument('--xDD-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the data_1 x data_2 cross-correlation')

    parser.add_argument(
        '--xRR-file',
        type=str,
        default=None,
        required=False,
        help='File of the random_1 x random_2 cross-correlation')

    parser.add_argument('--xD1R2-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the data_1 x random_2 cross-correlation')

    parser.add_argument('--xR1D2-file',
                        type=str,
                        default=None,
                        required=False,
                        help='File of the random_1 x data_2 cross-correlation')

    parser.add_argument(
        '--do-not-smooth-cov',
        action='store_true',
        default=False,
        help='Do not smooth the covariance matrix from sub-sampling')

    parser.add_argument('--get-cov-from-poisson',
                        action='store_true',
                        default=False,
                        help='Get covariance matrix from Poisson statistics')

    parser.add_argument(
        '--cov',
        type=str,
        default=None,
        required=False,
        help=('Path to a covariance matrix file (if not provided it will be '
              'calculated by subsampling or from Poisson statistics)'))

    args = parser.parse_args()

    ### Auto or cross correlation?
    if ((args.DD_file is None and args.xDD_file is None)
            or (args.DD_file is not None and args.xDD_file is not None)
            or (args.cov is not None and not args.get_cov_from_poisson)):
        userprint(('ERROR: No data files, or both auto and cross data files, '
                   'or two different method for covariance'))
        sys.exit()
    elif args.DD_file is not None:
        corr = 'AUTO'
        correlation_files = {
            'DD': args.DD_file,
            'RR': args.RR_file,
            'DR': args.DR_file,
            'RD': args.RD_file
        }
    elif not args.xDD_file is None:
        # TODO: Test if picca_co.py and export_co.py work for cross
        corr = 'CROSS'
        correlation_files = {
            'xDD': args.xDD_file,
            'xRR': args.xRR_file,
            'xD1R2': args.xD1R2_file,
            'xR1D2': args.xR1D2_file
        }

    # Read files
    data = {}
    for type_corr, filename in correlation_files.items():
        hdul = fitsio.FITS(filename)
        header = hdul[1].read_header()
        fid_Om = header['OMEGAM']
        fid_Or = header['OMEGAR']
        fid_Ok = header['OMEGAK']
        fid_wl = header['WL']
        if type_corr in ['DD', 'RR']:
            num_objects = header['NOBJ']
            coef = num_objects * (num_objects - 1)
        else:
            num_objects = header['NOBJ']
            num_objects2 = header['NOBJ2']
            coef = num_objects * num_objects2

        if type_corr in ['DD', 'xDD']:
            data['COEF'] = coef
            for item in ['NT', 'NP', 'RTMAX', 'RPMIN', 'RPMAX']:
                data[item] = header[item]
            for item in ['RP', 'RT', 'Z', 'NB']:
                data[item] = np.array(hdul[1][item][:])

        data[type_corr] = {}
        data[type_corr]['NSIDE'] = header['NSIDE']
        data[type_corr]['HLPXSCHM'] = hdul[2].read_header()['HLPXSCHM']
        w = np.array(hdul[2]['WE'][:]).sum(axis=1) > 0.
        if w.sum() != w.size:
            userprint("INFO: {} sub-samples were empty".format(w.size -
                                                               w.sum()))
        data[type_corr]['HEALPID'] = hdul[2]['HEALPID'][:][w]
        data[type_corr]['WE'] = hdul[2]['WE'][:][w] / coef
        hdul.close()

    # Compute correlation
    if corr == 'AUTO':
        xi_data_data = data['DD']['WE'].sum(axis=0)
        xi_random_random = data['RR']['WE'].sum(axis=0)
        xi_data_random = data['DR']['WE'].sum(axis=0)
        xi_random_data = data['RD']['WE'].sum(axis=0)
        w = xi_random_random > 0.
        xi = np.zeros(xi_data_data.size)
        xi[w] = (xi_data_data[w] + xi_random_random[w] - xi_random_data[w] -
                 xi_data_random[w]) / xi_random_random[w]
    else:
        xi_data_data = data['xDD']['WE'].sum(axis=0)
        xi_random_random = data['xRR']['WE'].sum(axis=0)
        xi_data1_random2 = data['xD1R2']['WE'].sum(axis=0)
        xi_data2_random1 = data['xR1D2']['WE'].sum(axis=0)
        w = xi_random_random > 0.
        xi = np.zeros(xi_data_data.size)
        xi[w] = (xi_data_data[w] + xi_random_random[w] - xi_data1_random2[w] -
                 xi_data2_random1[w]) / xi_random_random[w]
    data['DA'] = xi
    data['corr_DD'] = xi_data_data
    data['corr_RR'] = xi_random_random

    # Compute covariance matrix
    if not args.cov is None:
        userprint('INFO: Read covariance from file')
        hdul = fitsio.FITS(args.cov)
        data['CO'] = hdul[1]['CO'][:]
        hdul.close()
    elif args.get_cov_from_poisson:
        userprint('INFO: Compute covariance from Poisson statistics')
        w = data['corr_RR'] > 0.
        covariance = np.zeros(data['corr_DD'].size)
        covariance[w] = ((data['COEF'] / 2. * data['corr_DD'][w])**2 /
                         (data['COEF'] / 2. * data['corr_RR'][w])**3)
        data['CO'] = np.diag(covariance)
    else:
        userprint('INFO: Compute covariance from sub-sampling')

        ### To have same number of HEALPix
        for type_corr1 in list(correlation_files):
            for type_corr2 in list(correlation_files):

                if data[type_corr1]['NSIDE'] != data[type_corr2]['NSIDE']:
                    userprint("ERROR: NSIDE are different: {} != "
                              "{}".format(data[type_corr1]['NSIDE'],
                                          data[type_corr2]['NSIDE']))
                    sys.exit()
                if data[type_corr1]['HLPXSCHM'] != data[type_corr2]['HLPXSCHM']:
                    userprint("ERROR: HLPXSCHM are different: {} != "
                              "{}".format(data[type_corr1]['HLPXSCHM'],
                                          data[type_corr2]['HLPXSCHM']))
                    sys.exit()

                w = np.logical_not(
                    np.in1d(data[type_corr1]['HEALPID'],
                            data[type_corr2]['HEALPID']))
                if w.sum() != 0:
                    userprint("WARNING: HEALPID are different by {} for {}:{} "
                              "and {}:{}".format(
                                  w.sum(), type_corr1,
                                  data[type_corr1]['HEALPID'].size, type_corr2,
                                  data[type_corr2]['HEALPID'].size))
                    new_healpix = data[type_corr1]['HEALPID'][w]
                    num_new_healpix = new_healpix.size
                    num_bins = data[type_corr2]['WE'].shape[1]
                    data[type_corr2]['HEALPID'] = np.append(
                        data[type_corr2]['HEALPID'], new_healpix)
                    data[type_corr2]['WE'] = np.append(data[type_corr2]['WE'],
                                                       np.zeros(
                                                           (num_new_healpix,
                                                            num_bins)),
                                                       axis=0)

        # Sort the data by the healpix values
        for type_corr1 in list(correlation_files):
            sort = np.array(data[type_corr1]['HEALPID']).argsort()
            data[type_corr1]['WE'] = data[type_corr1]['WE'][sort]
            data[type_corr1]['HEALPID'] = data[type_corr1]['HEALPID'][sort]

        if corr == 'AUTO':
            xi_data_data = data['DD']['WE']
            xi_random_random = data['RR']['WE']
            xi_data_random = data['DR']['WE']
            xi_random_data = data['RD']['WE']
            w = xi_random_random > 0.
            xi = np.zeros(xi_data_data.shape)
            xi[w] = (xi_data_data[w] + xi_random_random[w] - xi_data_random[w]
                     - xi_random_data[w]) / xi_random_random[w]
            weights = data['DD']['WE']
        else:
            xi_data_data = data['xDD']['WE']
            xi_random_random = data['xRR']['WE']
            xi_data1_random2 = data['xD1R2']['WE']
            xi_data2_random1 = data['xR1D2']['WE']
            w = xi_random_random > 0.
            xi = np.zeros(xi_data_data.shape)
            xi[w] = ((xi_data_data[w] + xi_random_random[w] -
                      xi_data1_random2[w] - xi_data2_random1[w]) /
                     xi_random_random[w])
            weights = data['xDD']['WE']
        data['HLP_DA'] = xi
        data['HLP_WE'] = weights

        if args.do_not_smooth_cov:
            userprint('INFO: The covariance will not be smoothed')
            covariance = compute_cov(xi, weights)
        else:
            userprint('INFO: The covariance will be smoothed')
            delta_r_par = (data['RPMAX'] - data['RPMIN']) / data['NP']
            delta_r_trans = (data['RTMAX'] - 0.) / data['NT']
            covariance = smooth_cov(xi,
                                    weights,
                                    data['RP'],
                                    data['RT'],
                                    delta_r_par=delta_r_par,
                                    delta_r_trans=delta_r_trans)
        data['CO'] = covariance

    try:
        scipy.linalg.cholesky(data['CO'])
    except scipy.linalg.LinAlgError:
        userprint('WARNING: Matrix is not positive definite')

    # Identity distortion matrix
    data['DM'] = np.eye(data['DA'].size)

    # Save results
    results = fitsio.FITS(args.out, 'rw', clobber=True)
    header = {}
    if corr == 'AUTO':
        nside = data['DD']['NSIDE']
    else:
        nside = data['xDD']['NSIDE']
    header = [{
        'name': 'RPMIN',
        'value': data['RPMIN'],
        'comment': 'Minimum r-parallel'
    }, {
        'name': 'RPMAX',
        'value': data['RPMAX'],
        'comment': 'Maximum r-parallel'
    }, {
        'name': 'RTMAX',
        'value': data['RTMAX'],
        'comment': 'Maximum r-transverse'
    }, {
        'name': 'NP',
        'value': data['NP'],
        'comment': 'Number of bins in r-parallel'
    }, {
        'name': 'NT',
        'value': data['NT'],
        'comment': 'Number of bins in r-transverse'
    }, {
        'name': 'NSIDE',
        'value': nside,
        'comment': 'Healpix nside'
    }, {
        'name': 'OMEGAM',
        'value': fid_Om,
        'comment': 'Omega_matter(z=0) of fiducial LambdaCDM cosmology'
    }, {
        'name': 'OMEGAR',
        'value': fid_Or,
        'comment': 'Omega_radiation(z=0) of fiducial LambdaCDM cosmology'
    }, {
        'name': 'OMEGAK',
        'value': fid_Ok,
        'comment': 'Omega_k(z=0) of fiducial LambdaCDM cosmology'
    }, {
        'name':
        'WL',
        'value':
        fid_wl,
        'comment':
        'Equation of state of dark energy of fiducial LambdaCDM cosmology'
    }]
    names = ['RP', 'RT', 'Z', 'DA', 'CO', 'DM', 'NB']
    comment = [
        'R-parallel', 'R-transverse', 'Redshift', 'Correlation',
        'Covariance matrix', 'Distortion matrix', 'Number of pairs'
    ]
    results.write([data[name] for name in names],
                  names=names,
                  header=header,
                  comment=comment,
                  extname='COR')

    if args.cov is None and not args.get_cov_from_poisson:
        if corr == 'AUTO':
            healpix_scheme = data['DD']['HLPXSCHM']
            healpix_list = data['DD']['HEALPID']
        else:
            healpix_scheme = data['xDD']['HLPXSCHM']
            healpix_list = data['xDD']['HEALPID']
        header2 = [{
            'name': 'HLPXSCHM',
            'value': healpix_scheme,
            'comment': 'healpix scheme'
        }]
        comment = ['Healpix index', 'Sum of weight', 'Correlation']
        results.write([healpix_list, data['HLP_WE'], data['HLP_DA']],
                      names=['HEALPID', 'WE', 'DA'],
                      header=header2,
                      comment=comment,
                      extname='SUB_COR')

    results.close()
Esempio n. 5
0
def main():
    """Export auto and cross-correlation for the fitter."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Export auto and cross-correlation for the fitter.')

    parser.add_argument(
        '--data',
        type=str,
        default=None,
        required=True,
        help='Correlation produced via picca_cf.py, picca_xcf.py, ...')

    parser.add_argument('--out',
                        type=str,
                        default=None,
                        required=True,
                        help='Output file name')

    parser.add_argument(
        '--dmat',
        type=str,
        default=None,
        required=False,
        help=(
            'Distortion matrix produced via picca_dmat.py, picca_xdmat.py... '
            '(if not provided will be identity)'))

    parser.add_argument(
        '--cov',
        type=str,
        default=None,
        required=False,
        help=('Covariance matrix (if not provided will be calculated by '
              'subsampling)'))

    parser.add_argument(
        '--cor',
        type=str,
        default=None,
        required=False,
        help=('Correlation matrix (if not provided will be calculated by '
              'subsampling)'))

    parser.add_argument(
        '--remove-shuffled-correlation',
        type=str,
        default=None,
        required=False,
        help='Remove a correlation from shuffling the distribution of los')

    parser.add_argument('--do-not-smooth-cov',
                        action='store_true',
                        default=False,
                        help='Do not smooth the covariance matrix')

    args = parser.parse_args()

    hdul = fitsio.FITS(args.data)

    r_par = np.array(hdul[1]['RP'][:])
    r_trans = np.array(hdul[1]['RT'][:])
    z = np.array(hdul[1]['Z'][:])
    num_pairs = np.array(hdul[1]['NB'][:])
    xi = np.array(hdul[2]['DA'][:])
    weights = np.array(hdul[2]['WE'][:])

    head = hdul[1].read_header()
    num_bins_r_par = head['NP']
    num_bins_r_trans = head['NT']
    r_trans_max = head['RTMAX']
    r_par_min = head['RPMIN']
    r_par_max = head['RPMAX']
    hdul.close()

    if not args.remove_shuffled_correlation is None:
        hdul = fitsio.FITS(args.remove_shuffled_correlation)
        xi_shuffled = hdul['COR']['DA'][:]
        weight_shuffled = hdul['COR']['WE'][:]
        xi_shuffled = (xi_shuffled * weight_shuffled).sum(axis=1)
        weight_shuffled = weight_shuffled.sum(axis=1)
        w = weight_shuffled > 0.
        xi_shuffled[w] /= weight_shuffled[w]
        hdul.close()
        xi -= xi_shuffled[:, None]

    if args.cov is not None:
        userprint(("INFO: The covariance-matrix will be read from file: "
                   "{}").format(args.cov))
        hdul = fitsio.FITS(args.cov)
        covariance = hdul[1]['CO'][:]
        hdul.close()
    elif args.cor is not None:
        userprint(("INFO: The correlation-matrix will be read from file: "
                   "{}").format(args.cor))
        hdul = fitsio.FITS(args.cor)
        correlation = hdul[1]['CO'][:]
        hdul.close()
        if ((correlation.min() < -1.) or (correlation.min() > 1.)
                or (correlation.max() < -1.) or (correlation.max() > 1.)
                or np.any(np.diag(correlation) != 1.)):
            userprint(("WARNING: The correlation-matrix has some incorrect "
                       "values"))
        var = np.diagonal(correlation)
        correlation = correlation / np.sqrt(var * var[:, None])
        covariance = compute_cov(xi, weights)
        var = np.diagonal(covariance)
        covariance = correlation * np.sqrt(var * var[:, None])
    else:
        delta_r_par = (r_par_max - r_par_min) / num_bins_r_par
        delta_r_trans = (r_trans_max - 0.) / num_bins_r_trans
        if not args.do_not_smooth_cov:
            userprint("INFO: The covariance will be smoothed")
            covariance = smooth_cov(xi,
                                    weights,
                                    r_par,
                                    r_trans,
                                    delta_r_trans=delta_r_trans,
                                    delta_r_par=delta_r_par)
        else:
            userprint("INFO: The covariance will not be smoothed")
            covariance = compute_cov(xi, weights)

    xi = (xi * weights).sum(axis=0)
    weights = weights.sum(axis=0)
    w = weights > 0
    xi[w] /= weights[w]

    try:
        scipy.linalg.cholesky(covariance)
    except scipy.linalg.LinAlgError:
        userprint("WARNING: Matrix is not positive definite")

    if args.dmat is not None:
        hdul = fitsio.FITS(args.dmat)
        dmat = hdul[1]['DM'][:]
        try:
            r_par_dmat = hdul[2]['RP'][:]
            r_trans_dmat = hdul[2]['RT'][:]
            z_dmat = hdul[2]['Z'][:]
        except IOError:
            r_par_dmat = r_par.copy()
            r_trans_dmat = r_trans.copy()
            z_dmat = z.copy()
        if dmat.shape == (xi.size, xi.size):
            r_par_dmat = r_par.copy()
            r_trans_dmat = r_trans.copy()
            z_dmat = z.copy()
        hdul.close()
    else:
        dmat = np.eye(len(xi))
        r_par_dmat = r_par.copy()
        r_trans_dmat = r_trans.copy()
        z_dmat = z.copy()

    results = fitsio.FITS(args.out, 'rw', clobber=True)
    header = [{
        'name': 'RPMIN',
        'value': r_par_min,
        'comment': 'Minimum r-parallel'
    }, {
        'name': 'RPMAX',
        'value': r_par_max,
        'comment': 'Maximum r-parallel'
    }, {
        'name': 'RTMAX',
        'value': r_trans_max,
        'comment': 'Maximum r-transverse'
    }, {
        'name': 'NP',
        'value': num_bins_r_par,
        'comment': 'Number of bins in r-parallel'
    }, {
        'name': 'NT',
        'value': num_bins_r_trans,
        'comment': 'Number of bins in r-transverse'
    }]
    comment = [
        'R-parallel', 'R-transverse', 'Redshift', 'Correlation',
        'Covariance matrix', 'Distortion matrix', 'Number of pairs'
    ]
    results.write([r_par, r_trans, z, xi, covariance, dmat, num_pairs],
                  names=['RP', 'RT', 'Z', 'DA', 'CO', 'DM', 'NB'],
                  comment=comment,
                  header=header,
                  extname='COR')
    comment = ['R-parallel model', 'R-transverse model', 'Redshift model']
    results.write([r_par_dmat, r_trans_dmat, z_dmat],
                  names=['DMRP', 'DMRT', 'DMZ'],
                  comment=comment,
                  extname='DMATTRI')
    results.close()
Esempio n. 6
0
def main(cmdargs):
    """Export auto and cross-correlation for the fitter."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Export auto and cross-correlation for the fitter.')

    parser.add_argument(
        '--data',
        type=str,
        default=None,
        required=True,
        help='Correlation produced via picca_cf.py, picca_xcf.py, ...')

    parser.add_argument('--out',
                        type=str,
                        default=None,
                        required=True,
                        help='Output file name')

    parser.add_argument(
        '--dmat',
        type=str,
        default=None,
        required=False,
        help=(
            'Distortion matrix produced via picca_dmat.py, picca_xdmat.py... '
            '(if not provided will be identity)'))

    parser.add_argument(
        '--cov',
        type=str,
        default=None,
        required=False,
        help=('Covariance matrix (if not provided will be calculated by '
              'subsampling)'))

    parser.add_argument(
        '--cor',
        type=str,
        default=None,
        required=False,
        help=('Correlation matrix (if not provided will be calculated by '
              'subsampling)'))

    parser.add_argument(
        '--remove-shuffled-correlation',
        type=str,
        default=None,
        required=False,
        help='Remove a correlation from shuffling the distribution of los')

    parser.add_argument('--do-not-smooth-cov',
                        action='store_true',
                        default=False,
                        help='Do not smooth the covariance matrix')

    parser.add_argument(
        '--blind-corr-type',
        default=None,
        choices=['lyaxlya', 'lyaxlyb', 'qsoxlya', 'qsoxlyb'],
        help='Type of correlation. Required to apply blinding in DESI')

    args = parser.parse_args(cmdargs)

    hdul = fitsio.FITS(args.data)

    r_par = np.array(hdul[1]['RP'][:])
    r_trans = np.array(hdul[1]['RT'][:])
    z = np.array(hdul[1]['Z'][:])
    num_pairs = np.array(hdul[1]['NB'][:])
    weights = np.array(hdul[2]['WE'][:])

    if 'DA_BLIND' in hdul[2].get_colnames():
        xi = np.array(hdul[2]['DA_BLIND'][:])
        data_name = 'DA_BLIND'
    else:
        xi = np.array(hdul[2]['DA'][:])
        data_name = 'DA'

    head = hdul[1].read_header()
    num_bins_r_par = head['NP']
    num_bins_r_trans = head['NT']
    r_trans_max = head['RTMAX']
    r_par_min = head['RPMIN']
    r_par_max = head['RPMAX']

    if "BLINDING" in head:
        blinding = head["BLINDING"]
        if blinding == 'minimal':
            blinding = 'corr_yshift'
            userprint("The minimal strategy is no longer supported."
                      "Automatically switch to corr_yshift.")
    else:
        # if BLINDING keyword not present (old file), ignore blinding
        blinding = "none"
    hdul.close()

    if args.remove_shuffled_correlation is not None:
        hdul = fitsio.FITS(args.remove_shuffled_correlation)
        xi_shuffled = hdul['COR'][data_name][:]
        weight_shuffled = hdul['COR']['WE'][:]
        xi_shuffled = (xi_shuffled * weight_shuffled).sum(axis=1)
        weight_shuffled = weight_shuffled.sum(axis=1)
        w = weight_shuffled > 0.
        xi_shuffled[w] /= weight_shuffled[w]
        hdul.close()
        xi -= xi_shuffled[:, None]

    if args.cov is not None:
        userprint(("INFO: The covariance-matrix will be read from file: "
                   "{}").format(args.cov))
        hdul = fitsio.FITS(args.cov)
        covariance = hdul[1]['CO'][:]
        hdul.close()
    elif args.cor is not None:
        userprint(("INFO: The correlation-matrix will be read from file: "
                   "{}").format(args.cor))
        hdul = fitsio.FITS(args.cor)
        correlation = hdul[1]['CO'][:]
        hdul.close()
        if ((correlation.min() < -1.) or (correlation.min() > 1.)
                or (correlation.max() < -1.) or (correlation.max() > 1.)
                or np.any(np.diag(correlation) != 1.)):
            userprint(("WARNING: The correlation-matrix has some incorrect "
                       "values"))
        var = np.diagonal(correlation)
        correlation = correlation / np.sqrt(var * var[:, None])
        covariance = compute_cov(xi, weights)
        var = np.diagonal(covariance)
        covariance = correlation * np.sqrt(var * var[:, None])
    else:
        delta_r_par = (r_par_max - r_par_min) / num_bins_r_par
        delta_r_trans = (r_trans_max - 0.) / num_bins_r_trans
        if not args.do_not_smooth_cov:
            userprint("INFO: The covariance will be smoothed")
            covariance = smooth_cov(xi,
                                    weights,
                                    r_par,
                                    r_trans,
                                    delta_r_trans=delta_r_trans,
                                    delta_r_par=delta_r_par)
        else:
            userprint("INFO: The covariance will not be smoothed")
            covariance = compute_cov(xi, weights)

    xi = (xi * weights).sum(axis=0)
    weights = weights.sum(axis=0)
    w = weights > 0
    xi[w] /= weights[w]

    try:
        scipy.linalg.cholesky(covariance)
    except scipy.linalg.LinAlgError:
        userprint("WARNING: Matrix is not positive definite")

    if args.dmat is not None:
        hdul = fitsio.FITS(args.dmat)
        if data_name == "DA_BLIND" and 'DM_BLIND' in hdul[1].get_colnames():
            dmat = np.array(hdul[1]['DM_BLIND'][:])
            dmat_name = 'DM_BLIND'
        elif data_name == "DA_BlIND":
            userprint("Blinded correlations were given but distortion matrix "
                      "is unblinded. These files should not mix. Exiting...")
            sys.exit(1)
        elif 'DM_BLIND' in hdul[1].get_colnames():
            userprint(
                "Non-blinded correlations were given but distortion matrix "
                "is blinded. These files should not mix. Exiting...")
            sys.exit(1)
        else:
            dmat = hdul[1]['DM'][:]
            dmat_name = 'DM'

        try:
            r_par_dmat = hdul[2]['RP'][:]
            r_trans_dmat = hdul[2]['RT'][:]
            z_dmat = hdul[2]['Z'][:]
        except IOError:
            r_par_dmat = r_par.copy()
            r_trans_dmat = r_trans.copy()
            z_dmat = z.copy()
        if dmat.shape == (xi.size, xi.size):
            r_par_dmat = r_par.copy()
            r_trans_dmat = r_trans.copy()
            z_dmat = z.copy()
        hdul.close()
    else:
        dmat = np.eye(len(xi))
        r_par_dmat = r_par.copy()
        r_trans_dmat = r_trans.copy()
        z_dmat = z.copy()
        dmat_name = 'DM_EMPTY'

    results = fitsio.FITS(args.out, 'rw', clobber=True)
    header = [
        {
            'name': "BLINDING",
            'value': blinding,
            'comment': 'String specifying the blinding strategy'
        },
        {
            'name': 'RPMIN',
            'value': r_par_min,
            'comment': 'Minimum r-parallel'
        },
        {
            'name': 'RPMAX',
            'value': r_par_max,
            'comment': 'Maximum r-parallel'
        },
        {
            'name': 'RTMAX',
            'value': r_trans_max,
            'comment': 'Maximum r-transverse'
        },
        {
            'name': 'NP',
            'value': num_bins_r_par,
            'comment': 'Number of bins in r-parallel'
        },
        {
            'name': 'NT',
            'value': num_bins_r_trans,
            'comment': 'Number of bins in r-transverse'
        },
        {
            'name': 'OMEGAM',
            'value': head['OMEGAM'],
            'comment': 'Omega_matter(z=0) of fiducial LambdaCDM cosmology'
        },
        {
            'name': 'OMEGAR',
            'value': head['OMEGAR'],
            'comment': 'Omega_radiation(z=0) of fiducial LambdaCDM cosmology'
        },
        {
            'name': 'OMEGAK',
            'value': head['OMEGAK'],
            'comment': 'Omega_k(z=0) of fiducial LambdaCDM cosmology'
        },
        {
            'name':
            'WL',
            'value':
            head['WL'],
            'comment':
            'Equation of state of dark energy of fiducial LambdaCDM cosmology'
        },
    ]
    comment = [
        'R-parallel', 'R-transverse', 'Redshift', 'Correlation',
        'Covariance matrix', 'Distortion matrix', 'Number of pairs'
    ]

    # Check if we need blinding and apply it
    if 'BLIND' in data_name or blinding != 'none':
        if blinding == 'corr_yshift':
            userprint("Blinding using strategy corr_yshift.")
        else:
            raise ValueError(
                "Expected blinding to be 'corr_yshift' or 'minimal'."
                " Found {}.".format(blinding))

        if args.blind_corr_type is None:
            raise ValueError("Blinding strategy 'corr_yshift' requires"
                             " argument --blind_corr_type.")

        # Check type of correlation and get size and regular binning
        if args.blind_corr_type in ['lyaxlya', 'lyaxlyb']:
            corr_size = 2500
            rp_interp_grid = np.arange(2., 202., 4)
            rt_interp_grid = np.arange(2., 202., 4)
        elif args.blind_corr_type in ['qsoxlya', 'qsoxlyb']:
            corr_size = 5000
            rp_interp_grid = np.arange(-197.99, 202.01, 4)
            rt_interp_grid = np.arange(2., 202, 4)
        else:
            raise ValueError("Unknown correlation type: {}".format(
                args.blind_corr_type))

        if corr_size == len(xi):
            # Read the blinding file and get the right template
            blinding_filename = (
                '/global/cfs/projectdirs/desi/science/lya/y1-kp6/'
                'blinding/y1_blinding_v1.2_standard_29_03_2022.h5')
        else:
            # Read the regular grid blinding file and get the right template
            blinding_filename = (
                '/global/cfs/projectdirs/desi/science/lya/y1-kp6/'
                'blinding/y1_blinding_v1.2_regular_grid_29_03_2022.h5')

        if not os.path.isfile(blinding_filename):
            raise RuntimeError(
                "Missing blinding file. Make sure you are running at"
                " NERSC or contact picca developers")
        blinding_file = h5py.File(blinding_filename, 'r')
        hex_diff = np.array(
            blinding_file['blinding'][args.blind_corr_type]).astype(str)
        diff_grid = np.array([float.fromhex(x) for x in hex_diff])

        if corr_size == len(xi):
            diff = diff_grid
        else:
            # Interpolate the blinding template on the regular grid
            interp = scipy.interpolate.RectBivariateSpline(
                rp_interp_grid,
                rt_interp_grid,
                diff_grid.reshape(len(rp_interp_grid), len(rt_interp_grid)),
                kx=3,
                ky=3)
            diff = interp.ev(r_par, r_trans)

        # Check that the shapes match
        if np.shape(xi) != np.shape(diff):
            raise RuntimeError(
                "Unknown binning or wrong correlation type. Cannot blind."
                " Please raise an issue or contact picca developers.")

        # Add blinding
        xi = xi + diff

    results.write([xi, r_par, r_trans, z, covariance, dmat, num_pairs],
                  names=[data_name, 'RP', 'RT', 'Z', 'CO', dmat_name, 'NB'],
                  comment=comment,
                  header=header,
                  extname='COR')
    comment = ['R-parallel model', 'R-transverse model', 'Redshift model']
    results.write([r_par_dmat, r_trans_dmat, z_dmat],
                  names=['DMRP', 'DMRT', 'DMZ'],
                  comment=comment,
                  extname='DMATTRI')
    results.close()
def main(cmdargs):

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description=('Compute the cross-covariance matrix between two '
                     'correlations'))

    parser.add_argument(
        '--data1',
        type=str,
        default=None,
        required=True,
        help='Correlation 1 produced via picca_cf.py, picca_xcf.py, ...')

    parser.add_argument(
        '--data2',
        type=str,
        default=None,
        required=True,
        help='Correlation 2 produced via picca_cf.py, picca_xcf.py, ...')

    parser.add_argument('--out',
                        type=str,
                        default=None,
                        required=True,
                        help='Output file name')

    args = parser.parse_args(cmdargs)

    data = {}

    # Read data
    for index, filename in enumerate([args.data1, args.data2]):
        hdul = fitsio.FITS(filename)
        header = hdul[1].read_header()
        nside = header['NSIDE']
        header2 = hdul[2].read_header()
        healpix_scheme = header2['HLPXSCHM']
        weights = np.array(hdul[2]['WE'][:])
        healpix_list = np.array(hdul[2]['HEALPID'][:])

        if 'DA_BLIND' in hdul[2].get_colnames():
            xi = np.array(hdul[2]['DA_BLIND'][:])
        else:
            xi = np.array(hdul[2]['DA'][:])

        data[index] = {
            'DA': xi,
            'WE': weights,
            'HEALPID': healpix_list,
            'NSIDE': nside,
            'HLPXSCHM': healpix_scheme
        }
        hdul.close()

    # exit if NSIDE1 != NSIDE2
    if data[0]['NSIDE'] != data[1]['NSIDE']:
        userprint(("ERROR: NSIDE are different: {} != "
                   "{}").format(data[0]['NSIDE'], data[1]['NSIDE']))
        sys.exit()
    # exit if HLPXSCHM1 != HLPXSCHM2
    if data[0]['HLPXSCHM'] != data[1]['HLPXSCHM']:
        userprint(("ERROR: HLPXSCHM are different: {} != "
                   "{}").format(data[0]['HLPXSCHM'], data[1]['HLPXSCHM']))
        sys.exit()

    # Add unshared healpix as empty data
    for key in sorted(list(data.keys())):
        key2 = (key + 1) % 2
        w = np.logical_not(np.in1d(data[key2]['HEALPID'],
                                   data[key]['HEALPID']))
        if w.sum() > 0:
            new_healpix = data[key2]['HEALPID'][w]
            num_new_healpix = new_healpix.size
            num_bins = data[key]['DA'].shape[1]
            userprint(("Some healpix are unshared in data {}: "
                       "{}").format(key, new_healpix))
            data[key]['DA'] = np.append(data[key]['DA'],
                                        np.zeros((num_new_healpix, num_bins)),
                                        axis=0)
            data[key]['WE'] = np.append(data[key]['WE'],
                                        np.zeros((num_new_healpix, num_bins)),
                                        axis=0)
            data[key]['HEALPID'] = np.append(data[key]['HEALPID'], new_healpix)

    # Sort the data by the healpix values
    for key in sorted(list(data.keys())):
        sort = np.array(data[key]['HEALPID']).argsort()
        data[key]['DA'] = data[key]['DA'][sort]
        data[key]['WE'] = data[key]['WE'][sort]
        data[key]['HEALPID'] = data[key]['HEALPID'][sort]

    # Append the data
    xi = np.append(data[0]['DA'], data[1]['DA'], axis=1)
    weights = np.append(data[0]['WE'], data[1]['WE'], axis=1)

    # Compute the covariance
    covariance = compute_cov(xi, weights)

    # Get the cross-covariance
    num_bins = data[0]['DA'].shape[1]
    cross_covariance = covariance.copy()
    cross_covariance = cross_covariance[:, num_bins:]
    cross_covariance = cross_covariance[:num_bins, :]

    ### Get the cross-correlation
    var = np.diagonal(covariance)
    cor = covariance / np.sqrt(var * var[:, None])
    cross_correlation = cor.copy()
    cross_correlation = cross_correlation[:, num_bins:]
    cross_correlation = cross_correlation[:num_bins, :]

    ### Test if valid
    try:
        scipy.linalg.cholesky(covariance)
    except scipy.linalg.LinAlgError:
        userprint('WARNING: Matrix is not positive definite')

    ### Save
    results = fitsio.FITS(args.out, 'rw', clobber=True)
    results.write([cross_covariance, cross_correlation],
                  names=['CO', 'COR'],
                  comment=['Covariance matrix', 'Correlation matrix'],
                  extname='COVAR')
    results.close()