コード例 #1
0
ファイル: web_utils.py プロジェクト: vcalderon2009/cosmoutils
def url_checker(url_str):
    """
    Checks if the URL is valid or not.

    Parameters
    -----------
    url_str : `str`
        URL of the website to evaluate.

    Raises
    ----------
    LSSUtils_Error : `Exception`
        Program exception if input parameters are accepted
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    if not (isinstance(url_str, str)):
        msg = '{0} `url_str` ({1}) is not a STRING!'.format(
            file_msg, type(url_str))
        raise LSSUtils_Error(msg)
    ##
    ## Checking Website
    request_url = requests.get(url_str)
    if (request_url.status_code != 200):
        msg = '{0} `url_str` ({1}) does not exist!'.format(file_msg, url_str)
        raise LSSUtils_Error(msg)
コード例 #2
0
def read_hdf5_file_to_pandas_DF(hdf5_file, key=None):
    """
    Reads content of HDF5 file and converts it to a Pandas DataFrame

    Parameters
    ----------
    hdf5_file : str
        Path to the HDF5 file. This is the file that will be converted 
        to a pandas DataFrame.

    key : str or NoneType, optional
        Key or path in `hdf5_file` for the pandas DataFrame and the normal 
        HDF5 file.

    Returns
    ----------
    df : `pandas.DataFrame`
        DataFrame from `hdf5_file` under the `key` directory.
    """
    file_msg = fd.Program_Msg(__file__)
    fd.File_Exists(hdf5_file)
    # Reading in Pandas DataFrame
    try:
        df = pd.read_hdf(hdf5_file, key=key)
    except:
        msg = '{0} Could not read `hdf5_file` ({1})! Please check if it exists'
        msg = msg.format(file_msg, hdf5_file)
        raise LSSUtils_Error(file_msg)

    return df
コード例 #3
0
def pandas_file_to_hdf5_file(df_file, hdf5_file, key=None, mode='w'):
    """
    Converts a HDF5 with pandas format and converts it to normal HDF5 file

    Paramters
    ---------
    df_file : str
        Path to the `df_file` containing the pandas DataFrame to be converted

    hdf5_file : str
        Path to the output HDF5 file containg arrays as keys

    key : str or NoneType, optional
        Key or path in HDF5 file for the `df_file` and `hdf5_file`
    """
    file_msg = fd.Program_Msg(__file__)
    fd.File_Exists(filename)
    # Reading in DataFrame
    if not key:
        data, key = read_pandas_hdf5(df_file, key=None, ret=True)
    else:
        data = read_pandas_hdf5(df_file, key=key)
    # Rearranging data
    arr_names = data.dtypes.index.values
    dtype_arr = data.dtypes.values
    dtypes_arr = np.array([x.str for x in dtypes_arr])
    data_dtypes = np.dtype(zip(arr_names, dtypes_arr))
    dataset = np.recarray((len(data), ), dtype=data_dtypes)
    for name in dataset.dtype.names:
        dataset[name] = data[name]
    # Saving file to HDF5 format
    hdf5_obj = h5py.File(hdf5_file, mode=mode)
    hdf5_obj.create_dataset(key, data=dataset)
    hdf5_obj.close()
    msg = '{0} HDF5 file created: {1}'.format(file_msg, hdf5_file)
コード例 #4
0
def pandas_df_to_hdf5_file(df, hdf5_file, key=None, mode='w', complevel=8):
    """
    Saves a `pandas.DataFrame` into a `pandas` HDF5 FILE.

    Parameters
    ----------
    df : `pandas.DataFrame`
        DataFrame to be converted and saved into a HDF5 file.

    hdf5_file : str
        Path to the output HDF5 file

    key : str or NoneType, optional
        Key or path, under which `df` will be saved in the `hdf5_file`.

    mode : {'w','a'}, optional
        Mode to handle `hdf5_file`. This value is set to `w` by default,
        which stand for `write`.

    complevel : int, optional
        Level of compression for `hdf5_file`.
        The range of `complevel` is rane(0-9).
        This is set to a default of 8.
    """
    file_msg = fd.Program_Msg(__file__)
    # Saving DataFrame to `hdf5_file`
    try:
        data.to_hdf(hdf5_file, key, mode=mode, complevel=complevel)
        msg = '{0} HDF5 file created: {1}'.format(file_msg, hdf5_file)
        print(msg)
    except:
        msg = '{0} Could not create HDF5 file'.format(file_msg)
        raise LSSUtils_Error(msg)
コード例 #5
0
def Bins_array_create(arr, base=10):
    """
    Generates an evenly-spaced array between the minimum and maximum value 
    of a given array,

    Parameters
    ----------
    arr : array_like
        Array of of numbers or floats

    base : int or float, optional
        Interval used to create the evenly-spaced array of elements

    Returns
    ----------
    bins_arr : `numpy.ndarray`
        Array of elements separated in intervals of `base`
    """
    file_msg = fd.Program_Msg(__file__)
    # Transforming input data
    base = float(base)
    arr = np.asarray(arr)
    # Checking array dimensions
    if arr.ndim != 1:
        msg = '{0} The input array is not of dimension 1, but of `{1}`'.format(
            file_msg, arr.ndim)
        raise LSSUtils_Error(msg)
    # Creating evenly-spaced array
    arr_min = myfloor(arr.min(), base=base)
    arr_max = myceil(arr.max(), base=base)
    bins_arr = np.arange(arr_min, arr_max + 0.5 * base, base)

    return bins_arr
コード例 #6
0
def IDL_read_file(idl_file):
    """
    Reads an IDL file and converts it to a Python dictionary

    Parameters
    ----------
    idl_file : string
        Path to the filename being used

    Returns
    ----------
    idl_dict : python dictionary
        Dictionary with the data from `idl_file`
    """
    # Checking that file exists
    fd.File_Exists(idl_file)
    # Converting to dictionary
    try:
        idl_dict = readsav(idl_file, python_dict=True)
    except:
        msg = '{0} `idl_file` {0} is not an IDL file'.format(
            fd.Program_Msg(__file__), idl_file)
        raise LSSUtils_Error(msg)

    return idl_dict
コード例 #7
0
def cookiecutter_paths(path='./'):
    """
    Paths to main folders in the `Data Science` cookiecutter template.
    This structure was taken from :
    - https://drivendata.github.io/cookiecutter-data-science/

    Parameters
    ----------
    path : str, optional
        Path to the file within the `.git` repository

    Return
    ----------
    param_dict : python dictionary
        Dictionary with info of the proect that uses the Data Science 
        cookiecutter template.

    Raises
    ----------
    LSSUtils_Error : exception
        If `path` is not within a .git directory, it raises an error.
    """
    # Base Path
    base_dir = git_root_dir(path) + '/'
    # Checking that directory exists
    if os.path.exists(base_dir):
        # Plot Directory
        plot_dir = os.path.join(base_dir, 'reports', 'figures/')
        # Source directory
        src_dir = os.path.join(base_dir, 'src', 'data')
        # Data path
        data_dir = os.path.join(base_dir, 'data/')
        # Creating files
        for dir_ii in [plot_dir, src_dir, data_dir]:
            fd.Path_Folder(dir_ii)
        # Saving to dictionary
        param_dict = {}
        param_dict['base_dir'] = base_dir
        param_dict['plot_dir'] = plot_dir
        param_dict['src_dir'] = src_dir
        param_dict['data_dir'] = data_dir
    else:
        msg = '{0} `base_dir` ({1}) is not a Git directory! Exiting'.format(
            fd.Program_Msg(__file__), base_dir)
        raise LSSUtils_Error(msg)

    return param_dict
コード例 #8
0
def luminosity_to_absolute_mag(lum,
                               filter_opt,
                               system='SDSS_Blanton_2003_z0.1'):
    """
    Calculates the absolute magnitude of object through the `filter_opt`
    filter.

    Parameters
    -----------
    lum : float, int, array_like
        Luminosity of 1 or more objects. In units of `solar luminosities`.

    filter_opt : {'U', 'B', 'V', 'R', 'I', 'J', 'H', 'K'} str
        Magnitude filter to use.

    system : {'Binney_and_Merrifield_1998', 'SDSS_Blanton_2003_z0.1'} str
        Kind of filter to use.

        Options:
            - 'Binney_and_Merrifield_1998' : See Binney and Merrifield, 1998
            - 'SDSS_Blanton_2003_z0.1' : See Blanton et al. (2003) Eqn. 14.

    Returns
    -----------
    abs_mag : float, int, or array_like
        Absolute magnitude of one or multiple objects. Same type as `lum`

    Raises
    ----------
    LSSUtils_Error : Exception
        Program exception if input parameters are accepted
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    valid_types = (float, int, list, np.ndarray)
    if not (isinstance(abs_mag, valid_types)):
        msg = '{0} `abs_mag` ({1}) is not a valid type!'.format(
            file_msg, abs_mag)
        raise LSSUtils_Error(msg)
    ## Obtaining Sun's absolute magnitude
    abs_mag_sun = get_sun_mag(filter_opt, system=system)
    ## Absolute magnitude calculation
    lum_sun = 1.0  # In units of solar luminosities
    # Absolute magnitude of objects
    abs_mag = abs_mag_sun - 2.5 * np.log10(lum / lum_sun)

    return abs_mag
コード例 #9
0
def absolute_magnitude_to_luminosity(abs_mag,
                                     filter_opt,
                                     system='SDSS_Blanton_2003_z0.1'):
    """
    Calculates the luminosity of the object through `filter_opt` filter.

    Parameters
    -----------
    abs_mag : float, int, or array_like
        Absolute magnitude of one or multiple objects.

    filter_opt : {'U', 'B', 'V', 'R', 'I', 'J', 'H', 'K'} str
        Magnitude filter to use.

    system : {'Binney_and_Merrifield_1998', 'SDSS_Blanton_2003_z0.1'} str
        Kind of filter to use.

        Options:
            - 'Binney_and_Merrifield_1998' : See Binney and Merrifield, 1998
            - 'SDSS_Blanton_2003_z0.1' : See Blanton et al. (2003) Eqn. 14.

    Returns
    -----------
    log_L : float or array_like
        Logarithmic value of the luminosity in the `filter_opt` band.

    Raises
    ----------
    LSSUtils_Error : Exception
        Program exception if input parameters are accepted
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    valid_types = (float, int, list, np.ndarray)
    if not (isinstance(abs_mag, valid_types)):
        msg = '{0} `abs_mag` ({1}) is not a valid type!'.format(
            file_msg, abs_mag)
        raise LSSUtils_Error(msg)
    ## Obtaining Sun's absolute magnitude
    abs_mag_sun = get_sun_mag(filter_opt, system=system)
    ## Luminosity calculations
    log_L = (abs_mag_sun - abs_mag) * 0.4

    return log_L
コード例 #10
0
def extract_catls(catl_kind='data',
                  catl_type='mr',
                  sample_s='19',
                  datatype='.hdf5',
                  catl_info='members',
                  halotype='fof',
                  clf_method=3,
                  hod_n=0,
                  clf_seed=1235,
                  perf_opt=False,
                  return_len=False,
                  print_filedir=True):
    """
    Extracts a list of synthetic catalogues given input parameters

    Parameters
    ------------
    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    catl_type : {'mr', 'mstar'} str, optional
        Type of catalogue to use. It shows which abundance matching method
        was used for the CLF when assigning halo masses. This variable is 
        set to 'mr' by default.

        Options:
            - `mr` : Uses r-band absolute magnitude
            - `mstar` : Uses stellar masses

    sample_s : {'19', '20', '21'} str, optional
        Volume-limited sample to use. This variable is set to '19' by default.

        Options:
            - '19' : Uses the Mr19 volume-limited sample, i.e. 'Consuelo'
            - '20' : Uses the Mr20 volume-limited sample, i.e. 'Esmeralda'
            - '21' : Uses the Mr21 volume-limited sample, i.e. 'Carmen'
    
    datatype : {'.hdf5'} str, optional
        Data type of the files to be indexed in the folder. This variable 
        is set to '.hdf5' by default.

    catl_info : {'members', 'groups'} str, optional
        Option for which kind of catalogues to use.

        Options:
            - `members` : Member galaxies of group catalogues
            - `groups` : Catalogues with `group` information.

    halotype : {'fof', 'so'} str, optional
        Type of the dark matter halo of the simulation used to create the 
        synthetic catalogues. This variable is set to `fof` by default.

        Options:
            - 'fof': Friends-of-Friends halos.
            - 'so' : Spherical overdensity halos.

    clf_method : {1, 2, 3} int, optional
        Method for assigning galaxy properties to mock galaxies.
        This variable is set to `3` by default.

        Options:
            - `1` : Independent assigment of (g-r) color, sersic, and log(ssfr)
            - `2` : (g-r) decides active/passive designation and draw values 
                    independently.
            - `3` : (g-r) decides active/passive designations, and 
                    assigns other galaxy properties for that given galaxy.

    hod_n : {0, 1} int, optional
        HOD model to use. Only relevant when `catl_kind == mocks`.

    clf_seed : int, optional
        Seed used for the `CLF` random seed. This variable is set to `1235` 
        by default.

    perf_opt : boolean, optional
        If True, it chooses to analyze the `perfect` set of synthetic
        catalogues. This variable is set to `False` by default.
    
    return_len : boolean, optional
        If True, the function returns the total number of elements in 
        the folder that match the criteria.

    print_filedir : boolean, optional
        If True, the output directory is printed onto the screen.

    Returns
    ------------
    catl_arr : `numpy.ndarray`
        Array of elements/files matching the `datatype` type in the directory.

    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    catl_kind_valid = ['data', 'mocks']
    catl_type_valid = ['mr', 'mstar']
    sample_s_valid = ['19', '20', '21']
    catl_info_valid = ['members', 'groups']
    halotype_valid = ['fof', 'so']
    clf_method_valid = [1, 2, 3]
    hod_n_valid = [0, 1]
    # `catl_kind`
    if not (catl_kind in catl_kind_valid):
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `catl_type`
    if not (catl_type in catl_type_valid):
        msg = '{0} `catl_type` ({1}) is not a valid input!'.format(
            file_msg, catl_type)
        raise LSSUtils_Error(msg)
    # `sample_s`
    if not (sample_s in sample_s_valid):
        msg = '{0} `sample_s` ({1}) is not a valid input!'.format(
            file_msg, sample_s)
        raise LSSUtils_Error(msg)
    # `catl_info`
    if not (catl_info in catl_info_valid):
        msg = '{0} `catl_info` ({1}) is not a valid input!'.format(
            file_msg, catl_info)
        raise LSSUtils_Error(msg)
    # `halotype`
    if not (halotype in halotype_valid):
        msg = '{0} `halotype` ({1}) is not a valid input!'.format(
            file_msg, halotype)
        raise LSSUtils_Error(msg)
    # `clf_method`
    if not (clf_method in clf_method_valid):
        msg = '{0} `clf_method` ({1}) is not a valid input!'.format(
            file_msg, clf_method)
        raise LSSUtils_Error(msg)
    # `hod_n`
    if not (hod_n in hod_n_valid):
        msg = '{0} `hod_n` ({1}) is not a valid input!'.format(file_msg, hod_n)
        raise LSSUtils_Error(msg)
    # `perf_opt`
    if not (isinstance(perf_opt, bool)):
        msg = '{0} `perf_opt` ({1}) is not a valid type!'.format(
            file_msg, type(perf_opt))
        raise LSSUtils_Error(msg)
    # `print_filedir`
    if not (isinstance(print_filedir, bool)):
        msg = '{0} `print_filedir` ({1}) is not a valid type!'.format(
            file_msg, type(print_filedir))
        raise LSSUtils_Error(msg)
    # `return_len`
    if not (isinstance(return_len, bool)):
        msg = '{0} `return_len` ({1}) is not a valid type!'.format(
            file_msg, type(return_len))
        raise LSSUtils_Error(msg)
    # `datatype`
    if not (isinstance(datatype, str)):
        msg = '{0} `datatype` ({1}) is not a valid type!'.format(
            file_msg, type(datatype))
        raise LSSUtils_Error(msg)
    ##
    ## Extracting the path of the catalogues
    filedir = catl_sdss_dir(catl_kind=catl_kind,
                            catl_type=catl_type,
                            sample_s=sample_s,
                            catl_info=catl_info,
                            halotype=halotype,
                            clf_method=clf_method,
                            hod_n=hod_n,
                            clf_seed=clf_seed,
                            perf_opt=perf_opt,
                            print_filedir=print_filedir)
    ##
    ## Convertint to array
    catl_arr = np.sort(fd.Index(filedir, datatype))
    # Checking number of elements
    if len(catl_arr) == 0:
        msg = '{0} `catl_arr` contains 0 entries!'.format(file_msg)
        raise LSSUtils_Error(msg)
    ##
    ## Returning elements
    if return_len:
        return catl_arr, len(catl_arr)
    else:
        return catl_arr
コード例 #11
0
def sdss_catl_clean(catl_pd, catl_kind, catl_info='members', reindex=True):
    """
    Cleans the catalogue by removing `failed` values.

    Parameters
    -----------
    catl_pd : `pandas.DataFrame`
        Dataset with the catalogue information.

    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    catl_info : {'members', 'groups'} str, optional
        Option for which kind of catalogues to use.

        Options:
            - `members` : Member galaxies of group catalogues
            - `groups` : Catalogues with `group` information.

    reindex : boolean, optional
        If True, the output catalogue is re-indexed.

    Return
    -----------
    catl_pd_clean : `pandas.DataFrame`
        Cleaned version of `catl_pd`, after having removed `failed` values.

    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    catl_kind_valid = ['data', 'mocks']
    catl_info_valid = ['members', 'groups']
    # `catl_pd`
    if not (isinstance(catl_pd, pd.DataFrame)):
        msg = '{0} `catl_pd` ({1}) is not a valid type!'.format(
            file_msg, catl_pd)
        raise LSSUtils_Error(msg)
    # `catl_kind`
    if not (catl_kind in catl_kind_valid):
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `catl_info`
    if not (catl_info in catl_info_valid):
        msg = '{0} `catl_info` ({1}) is not a valid input!'.format(
            file_msg, catl_info)
        raise LSSUtils_Error(msg)
    # `reindex
    if not (isinstance(reindex, bool)):
        msg = '{0} `reindex` ({1}) is not a valid type!'.format(
            file_msg, type(reindex))
        raise LSSUtils_Error(msg)
    ##
    ## Defining `failed` values
    ssfr_fail_arr = [0, -99, -999, np.nan]
    mstar_fail_arr = [-1, 0, np.nan]
    ##
    ## Getting keys for catalogues
    (logssfr_key, logmstar_key) = catl_keys_prop(catl_kind=catl_kind,
                                                 catl_info=catl_info,
                                                 return_type='list')
    ##
    ## Cleaning catalogue entries
    # Data
    if catl_kind == 'data':
        # Clean version
        catl_pd_clean = catl_pd[~catl_pd[logssfr_key].isin(ssfr_fail_arr) &\
                                ~catl_pd[logmstar_key].isin(mstar_fail_arr)]
    # Mocks
    if catl_kind == 'mocks':
        # Clean version
        catl_pd_clean = catl_pd[~catl_pd[logssfr_key].isin(ssfr_fail_arr)]
    ##
    ## Reindexing
    if reindex:
        catl_pd_clean.reset_index(inplace=True, drop=True)

    return catl_pd_clean
コード例 #12
0
def Behroozi_relation(log_mstar, z=0., return_mhalo_h0=False, mstar_h0=False):
    """
    Returns the halo mass of a central galaxy as a function of its stellar 
    mass.

    Parameters
    -----------
    log_mstar : `float` ,`np.ndarray`, or array-like
        Value or array of values of base-10 logarithm of stellar mass 
        in h=1 solar mass units.

    z : int, float, `np.ndarray` or array-like
        Redshift of the halo hosting the galaxy. If passing an array,
        it must be of the same length as the input `log_mstar`.
    
    return_mhalo_h0 : `bool`, optional
        If True, the function returns the halo masses in ``h=1`` units.
        This variable is set to False by default.

    mstar_h0 : `bool`, optional
        If True, the stellar mass in `log_mstar` is converted from ``h=1``
        units to ``h=0.7`` units. This variable is set to False by default.

    Returns
    -----------
    log_halo_mass : float or `np.ndarray`
        Array or float containing 10-base logarithm of halo mass in ``h=1``
        solar mass units.

    Note
    ----------
    The parameter values in Behroozi+10 were fit to data assuming ``h=0.7``,
    but all halotools inputs are in ``h=1`` units. Thus we will transform
    our input stellar mass to ``h=0.7`` units, evaluate using the 
    Behroozi parameters, and then transform back to ``h=1`` units before 
    returning the result.
    """
    file_msg = fd.Program_Msg(__file__)
    little_h = 0.7
    ## Checking input parameters
    # `log_mstar`
    mstar_valid_types = (int, float, np.ndarray, list)
    if not (isinstance(log_mstar, mstar_valid_types)):
        msg = '{0} `log_mstar` ({1}) is not a valid type!'.format(
            file_msg, type(log_mstar))
        raise LSSUtils_Error(msg)
    # `z`
    z_valid_types = (int, float, np.ndarray, list)
    if not (isinstance(z, z_valid_types)):
        msg = '{0} `z` ({1}) is not a valid type!'.format(
            file_msg, type(z))
        raise LSSUtils_Error(msg)
    # `return_mhalo_h0`
    return_mhalo_h0_valid_types = (bool)
    if not (isinstance(return_mhalo_h0, return_mhalo_h0_valid_types)):
        msg = '{0} `return_mhalo_h0` ({1}) is not a valid type!'.format(
            file_msg, type(return_mhalo_h0))
        raise LSSUtils_Error(msg)
    # `mstar_h0`
    mstar_h0_valid_types = (bool)
    if not (isinstance(mstar_h0, mstar_h0_valid_types)):
        msg = '{0} `mstar_h0` ({1}) is not a valid type!'.format(
            file_msg, type(mstar_h0))
        raise LSSUtils_Error(msg)
    ##
    ## Behroozi dictionary
    param_dict = _retrieve_Behroozi_default_dict()
    ## Converting from different `h` units
    if mstar_h0:
        mstar = (10**log_mstar)/(little_h**2)
    else:
        mstar = 10.**(log_mstar)
    # Scale factor
    a = 1./(1. + z)
    ##
    ## Behroozi function
    logm0 = param_dict['smhm_m0_0'] + param_dict['smhm_m0_a']*(a - 1)
    m0    = 10.**logm0
    logm1 = param_dict['smhm_m1_0'] + param_dict['smhm_m1_a']*(a - 1)
    beta  = param_dict['smhm_beta_0'] + param_dict['smhm_beta_a']*(a - 1)
    delta = param_dict['smhm_delta_0'] + param_dict['smhm_delta_a']*(a - 1)
    gamma = param_dict['smhm_gamma_0'] + param_dict['smhm_gamma_a']*(a - 1)
    #
    stellar_mass_by_m0 = mstar/m0
    term3_numerator    = (stellar_mass_by_m0)**delta
    term3_denominator  = 1. + (stellar_mass_by_m0)**(-gamma)

    log_halo_mass = logm1 + beta*np.log10(stellar_mass_by_m0)
    log_halo_mass += (term3_numerator/term3_denominator) - 0.5

    # convert back from h=0.7 to h=1 and return the result
    if return_mhalo_h0:
        return np.log10((10.**log_halo_mass)*little_h)
    else:
        return log_halo_mass
コード例 #13
0
def sdss_catl_clean_nmin(catl_pd,
                         catl_kind,
                         catl_info='members',
                         nmin=1,
                         perf_opt=False):
    """
    Cleans the catalogue removing `failed` values, and only includes 
    galaxies that are in groups/halos above a `nmin` threshold.

    Parameters
    -----------
    catl_pd : `pandas.DataFrame`
        Dataset with the catalogue information.

    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    catl_info : {'members', 'groups'} str, optional
        Option for which kind of catalogues to use.

        Options:
            - `members` : Member galaxies of group catalogues
            - `groups` : Catalogues with `group` information.

    nmin : int, optional
        Minimum group richness to have in the (galaxy) group catalogue.
        This variable is set to `1` by default.

    perf_opt : boolean, optional
        Option for using a `perfect` mock catalogue.

    Return
    -----------
    catl_pd_clean : `pandas.DataFrame`
        Cleaned version of `catl_pd` after having removed `failed` values,
        and having choosen only galaxies within groups above a group richness
        threshold of `nmin`.

    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    catl_kind_valid = ['data', 'mocks']
    catl_info_valid = ['members', 'groups']
    # `catl_pd`
    if not (isinstance(catl_pd, pd.DataFrame)):
        msg = '{0} `catl_pd` ({1}) is not a valid type!'.format(
            file_msg, catl_pd)
        raise LSSUtils_Error(msg)
    # `catl_kind`
    if not (catl_kind in catl_kind_valid):
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `catl_info`
    if not (catl_info in catl_info_valid):
        msg = '{0} `catl_info` ({1}) is not a valid input!'.format(
            file_msg, catl_info)
        raise LSSUtils_Error(msg)
    # `nmin`
    if not ((nmin > 0) and (isinstance(nmin, int))):
        msg = '{0} `nmin` must be an integer and have a value above `0`'
        msg = msg.format(file_msg)
        raise LSSUtils_Error(msg)
    # `perf_opt`
    if not (isinstance(perf_opt, bool)):
        msg = '{0} `perf_opt` ({1}) is not a valid type!'.format(
            file_msg, type(perf_opt))
        raise LSSUtils_Error(msg)
    ##
    ## Types of galaxies
    cens = int(1)
    nmin = int(nmin)
    ##
    ## Getting keys for catalogue
    (gm_key, id_key, galtype_key) = catl_keys(catl_kind,
                                              return_type='list',
                                              perf_opt=perf_opt)
    ##
    ## Cleaning catalogue entries
    catl_pd_clean_all = sdss_catl_clean(catl_pd,
                                        catl_kind=catl_kind,
                                        catl_info=catl_info,
                                        reindex=True)
    ## Choosing only galaxies in groups of richness >= `nmin`
    # Member galaxies
    if catl_info == 'members':
        # Centrals
        catl_pd_cens = catl_pd_clean_all.loc[(
            catl_pd_clean_all[galtype_key] == cens), id_key]
        catl_pd_cl = catl_pd_clean_all[(
            catl_pd_clean_all[id_key].isin(catl_pd_cens))]
        # Group counts
        group_counts = Counter(catl_pd_cl[id_key])
        group_ngals = np.array(
            [xx for xx in group_counts.keys() if group_counts[xx] >= nmin])
        # Cleaned version
        catl_pd_clean = catl_pd_cl[catl_pd_cl[id_key].isin(group_ngals)]
        catl_pd_clean.reset_index(inplace=True, drop=True)
    # Group catalogue
    if catl_info == 'groups':
        if ('ngals' in catl_pd_clean_all.columns.tolist()):
            catl_pd_clean = catl_pd_clean_all.loc[
                catl_pd_clean_all['ngals'] >= nmin]
            catl_pd_clean.reset_index(inplace=True, drop=True)
        else:
            msg = '{0} Key `ngals` not found in DataFrame ... Exiting!'
            msg = msg.format(file_msg)
            raise LSSUtils_Error(msg)

    return catl_pd_clean
コード例 #14
0
def Stats_one_arr(x,
                  y,
                  base=1.,
                  arr_len=0,
                  arr_digit='n',
                  weights=None,
                  statfunc=np.nanmean,
                  bin_statval='average',
                  return_perc=False,
                  failval=np.nan):
    """
    Calculates statists for 2 arrays

    Parameters
    ----------
    x, y : array_like, shape(N,)
        Sets of elements for the 1st and 2nd observable

    base : float, optional
        Bin width in units of `x`. This variable is set to 1. by default.

    arr_len : int, optional
        Minimum number of elements in each bin of `x`

    arr_digit : {'n', 'y', 'o'} str, optional
        Option for which elements to return.

        Options:
            - 'n' : Returns `x_stat`, `y_stat`, `y_std`, `y_std_err`
            - 'y' : Returns `x_stat`, `y_stat`, `y_std`, `y_std_err`, `x_bins_data`, `y_bins_data` 
            - 'o' : Returns `x_bins_data`, `y_bins_data` 

    weights : array_like or NoneType, optional
        Array of weights for values in `y`. This is set to None by default.

    statfunc : {`numpy.nanmean`, `numpy.nanmedian`} statistical function, optional
        Numerical function used to calculate on bins of data.
        By default, this variable is set to `numpy.nanmean`

    bin_statval : {'average', 'left', 'right'} str, optional
        Option for where to put the bin values of `x` and `y`.
        By default, this variable is set to `average`, which means 
        that the values are those of the averages of the bins in `x` and 
        `y`.

    return_perc : `bool`, optional
        If true, it also returns the `percentiles` of the data.
        Last item in the return list.
        This variable is set to False by default.

    failval : int, float, NoneType, or NaN, optional
        This is the value used when no data is available for the bin.
        This is set to `numpy.nan` by default

    Returns
    ----------
    x_stat, y_stat : array_like
        Binned array of elements from `x`

    y_std : array_like
        Standard deviation of the binned array in `x`

    y_std_err : array_like
        Error in the `statfunc` of `y`

    x_bins_data : array_like, optional
        Elements of `x` in each bin with spacing of `base`.
        Only returned if `arr_digit` == 'y' or 'o'

    y_bins_data : array_like, optional
        Elements of `y` in each bin with spacing of `base`.
        Only returned if `arr_digit` == 'y' or 'o'

    perc_lims : array_like, shape(N,3)
        Percentiles in each bin of `x_stat`.
        Only returned if `arr_digit` == 'y' or 'o'
    """
    file_msg = fd.Program_Msg(__file__)
    ## Verifying input values
    # `arr_digit`
    if not ((arr_digit == 'y') or (arr_digit == 'n') or (arr_digit == 'o')):
        msg = '{0} `arr_digit` ({1}) is not a valid input. Exiting'.format(
            file_msg, arr_digit)
        raise LSSUtils_Error(msg)
    # Array dimensions
    if not ((len(x) > 0) and (len(y) > 0)):
        msg = '{0} The arrays `x` and `y` must have at least one value'
        msg = msg.format(file_msg)
        raise LSSUtils_Error(msg)
    if not ((np.asarray(x).ndim == 1) and (np.asarray(y).ndim == 1)):
        msg = '{0} The arrays `x` and `y` must have dimension of `1`'
        msg = msg.format(file_msg)
        raise LSSUtils_Error(msg)
    # `arr_len`
    if not (arr_len >= 0):
        msg = '{0} `arr_len` ({1}) must be greater or equal than zero!'.format(
            file_msg, arr_len)
        raise LSSUtils_Error(msg)
    # `bin_statval`
    if not (bin_statval in ['average', 'left', 'right']):
        msg = '{0} `bin_statval` ({1}) is not a valid input! Exiting'.format(
            file_msg, bin_statval)
        raise LSSUtils_Error(msg)
    ##
    ## Converting arrays to numpy arrays
    x = np.asarray(x)
    y = np.asarray(y)
    nelem = len(x)
    arr_len = int(arr_len - 1.) if arr_len != 0 else int(arr_len)
    ##
    ## Statistics calculations
    x_bins = Bins_array_create(x, base=base)
    x_digits = np.digitize(x, x_bins)
    ##
    ## Determining which bins to use
    ## These are the bins that meet the criteria of `arr_len`
    x_digits_bins = np.array([
        int(ii) for ii in range(1, len(x_bins))
        if len(x_digits[x_digits == ii]) > arr_len
    ])
    ## Elements in each bin
    # X-values
    x_bins_data = np.array([x[x_digits == ii] for ii in x_digits_bins])
    # Y-values
    y_bins_data = np.array([y[x_digits == ii] for ii in x_digits_bins])
    ##
    ## Selecting data in bins
    # Centered around the average
    if (bin_statval == 'average'):
        x_stat = np.array([
            statfunc(ii) if len(ii) > arr_len else failval
            for ii in x_bins_data
        ])
    # Left-hand side of the bin
    if (bin_statval == 'left'):
        x_stat = np.array([
            x_bins[:-1][ii] if len(x_bins_data[ii]) > arr_len else failval
            for ii in range(len(x_bins_data))
        ])
    # Right-hand side of the bin
    if (bin_statval == 'right'):
        x_stat = np.array([
            x_bins[1:][ii] if len(x_bins_data[ii]) > arr_len else failval
            for ii in range(len(x_bins_data))
        ])
    ##
    ## Determining the values in `y`
    # `stat_function`
    y_stat = np.array(
        [statfunc(ii) if len(ii) > arr_len else failval for ii in y_bins_data])
    # Standard Deviation
    y_std = np.array([
        np.nanstd(ii) if len(ii) > arr_len else failval for ii in y_bins_data
    ])
    # Error in the mean/median
    y_std_err = np.array([
        np.nanstd(ii) / math.sqrt(len(ii)) if len(ii) > arr_len else failval
        for ii in y_bins_data
    ])
    ##
    ## Correcting error inf `statfunc` == `numpy.nanmedian`
    if statfunc == np.nanmedian:
        y_std_err *= 1.253
    ##
    ## Returning percentiles
    if return_perc:
        perc_arr_lims = sigma_calcs(y_stat)
    ##
    ## Returning values
    if return_perc:
        if arr_digit == 'n':
            return_val = [x_stat, y_stat, y_std, y_std_err, perc_arr_lims]
        if arr_digit == 'y':
            return_val = [
                x_stat, y_stat, y_std, y_std_err, x_bins_data, y_bins_data,
                perc_arr_lims
            ]
        if arr_digit == 'o':
            return_val = [x_bins_data, y_bins_data, perc_arr_lims]
    else:
        if arr_digit == 'n':
            return_val = [x_stat, y_stat, y_std, y_std_err]
        if arr_digit == 'y':
            return_val = [
                x_stat, y_stat, y_std, y_std_err, x_bins_data, y_bins_data
            ]
        if arr_digit == 'o':
            return_val = [x_bins_data, y_bins_data]

    return return_val
コード例 #15
0
def get_sun_mag(filter_opt, system='SDSS_Blanton_2003_z0.1'):
    """
    Get solar absolaute magnitude for a filter in a system.
    Taken from Duncan Campbell, and later modified.

    Parameters
    ----------
    filter_opt : {'U', 'B', 'V', 'R', 'I', 'J', 'H', 'K'} str
        Magnitude filter to use.

    system : {'Binney_and_Merrifield_1998', 'SDSS_Blanton_2003_z0.1'} str
        Kind of filter to use.

        Options:
            - 'Binney_and_Merrifield_1998' : See Binney and Merrifield, 1998
            - 'SDSS_Blanton_2003_z0.1' : See Blanton et al. (2003) Eqn. 14.

    Returns
    ----------
    abs_mag_sun : float
        Solar absolute magnitude in `filter_opt` using `system` parameters.

    Raises
    ----------
    LSSUtils_Error : Exception
        Program exception if input parameters are accepted

    Examples
    ----------
    >>> get_sun_mag('R', 'Binney_and_Merrifield_1998')
    4.42

    >>> get_sun_mag('V', 'Binney_and_Merrifield_1998')
    4.83

    >>> get_sun_mag('g', 'SDSS_Blanton_2003_z0.1')
    5.45
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    filter_arr = [
        'U', 'B', 'V', 'R', 'I', 'J', 'H', 'K', 'u', 'g', 'r', 'i', 'z'
    ]
    system_arr = ['Binney_and_Merrifield_1998', 'SDSS_Blanton_2003_z0.1']
    # Checks
    # Input filter
    if not (filter_opt in filter_arr):
        msg = '{0} `filter_opt` ({1}) is not a valid option!'.format(
            file_msg, filter_opt)
        raise LSSUtils_Error
    # Input system
    if not (system in system_arr):
        msg = '{0} `system` ({1}) is not a valid option!'.format(
            file_msg, system)
        raise LSSUtils_Error
    ##
    ## Input parameters
    abs_mag_sun_dict = {
        'Binney_and_Merrifield_1998': {
            'U': 5.61,
            'B': 5.48,
            'V': 4.83,
            'R': 4.42,
            'I': 4.08,
            'J': 3.64,
            'H': 3.32,
            'K': 3.28
        },
        'SDSS_Blanton_2003_z0.1': {
            'u': 6.80,
            'g': 5.45,
            'r': 4.76,
            'i': 4.58,
            'z': 4.51
        }
    }
    ## Checking if key exists in dictionary
    ## and assigning magnitude
    if (filter_opt in abs_mag_sun_dict[system].keys()):
        abs_mag_sun = abs_mag_sun_dict[system][filter_opt]
    else:
        msg = '{0} `filter_opt` ({1}) is not a proper key of `system` ({2})'
        msg = msg.format(file_msg, filter_opt, system)
        raise LSSUtils_Error(msg)

    return abs_mag_sun
コード例 #16
0
def concatenate_pd_df(directory, filetype='hdf5', foutput=None, outonly=True):
    """
    Concatenates pandas DataFrames into a single DataFrame

    Parameters
    ----------
    directory : str
        Path to the folder containing multiple pandas-HDF5 files

    filetype : str, optional
        File format of the file in `directory` to be read
        This is set to `hdf5` by default.

    foutput : str or NoneType
        If not `None`, it is the basename of the output file in HDF5 format

    outonly : boolean, optional
        If True, it returns the pandas DataFrame.
        If False, it only saved the concatenated `pandas.DataFrame`.

    Returns
    ----------
    df_conc : `pandas.DataFrame`
        DataFrame containing the combined datasets from the files in
        `directory`.

    Raises
    ----------
    LSSUtils_Error : Exception
        If no files are found in `directory`, it raises an error 
        warning about this.
    """
    file_msg = fd.Program_Msg(__file__)
    # Checking that `directory` exists
    if not os.path.exists(directory):
        msg = '{0} `directory` {1} is not a valid path! Exiting!'.format(
            file_msg, directory)
        raise LSSUtils_Error(msg)
    # Concatenating files
    files_arr = df.index(directory, '.' + filetype, sort=True)
    print('{0} Found `{1}` files'.format(file_msg, files_arr.size))
    if len(files_arr) > 0:
        # Initializing array that contains info
        df_arr = [[] for x in range(len(files_arr))]
        # Looping over HDF5 (pandas) files
        for ii, file_ii in enumerate(files_arr):
            df_arr[ii] = read_pandas_hdf5(file_ii)
        # Concatenating arrays
        df_conc = pd.concat(df_arr, ignore_index=True)
        # Deciding name of resulting output file
        if (foutput is not None) and (type(foutput) == str):
            foutput_file = os.path.join(directory,
                                        '{0}.{1}'.format(foutput, filetype))
            # Saving resulting DataFrame
            pandas_df_to_hdf5_file(df_conc, foutput_file, key='/Main')
            # Checking file exists
            fd.File_Exists(foutput_file)
            print('{0} Output file saved in: {2}'.format(
                file_msg, foutput_file))
        # If only outputting concatenated DataFrame
        if outonly:
            return df_conc
    else:
        msg = '{0} No files in `{1}` with extension `{2}`'.format(
            file_msg, directory, filetype)
        raise LSSUtils_Error(msg)
コード例 #17
0
def scoring_methods(feat_arr, truth_arr, model=None, pred_arr=None,
    score_method='perc', threshold=0.1, perc=0.9):
    """
    Determines the overall score for given arrays, i.e. the `predicted`
    array and the `truth` array

    Parameters
    -----------
    feat_arr : `np.ndarray` or array-like, shape (n_samples, n_features)
        Array consisting of the `predicted values`. The dimensions of 
        `feat_arr` are `n_samples` by `n_features`, where `n_samples` 
        is the number of observations, and `n_features` the number of 
        features used.

    truth_arr : `np.ndarray` or array-like, shape (n_samples, n_outcomes)
        Array consisting of the `true` values for the `n_samples` 
        observations. The dimensions of `truth_arr` are 
        `n_samples` by `n_outcomes`, where `n_samples` is the 
        number of observations, and `n_outcomes` the number of predicted 
        outcomes.

    model : scikit-learn model object or `NoneType`
        Model used to estimate the score if ``score_method == 'model_score'``
        This variable is set to `None` by default.

    pred_arr : `np.ndarray`, array-like, or `NoneType`, shape (n_samples, n_outcomes)
        Array of predicted values from `feat_arr`. If ``model == None``,
        this variable must be an array-like object. If ``model != None``,
        this variable will not be used, and will be calculated using 
        the `model` object. This variable is set to `None` by default.

    score_method : {'perc', 'threshold', 'model_score', 'r2'} `str`, optional
        Type of scoring to use when determining how well an algorithm 
        is performing.

        Options:
            - 'perc' : Use percentage and rank-ordering of the values
            - 'threshold' : Score based on diffs of `threshold` or less from tru value
            - 'model_score' : Out-of-the-box metod from `sklearn` to determine success.
            - 'r2': R-squared statistic for error calcuation.

    threshold : float, optional
        Value to use when calculating the error within `threshold` value
        from the truth. This variable is set to `0.1` by default.

    perf : float, optional
        Value used when determining score within some `perc_val` percentile
        value form [0,1].

    Returns
    -----------
    method_score : float
        Overall score from `pred_arr` to predict `truth_arr`.

    Notes
    -----------
    For more information on how to pre-process your data, see 
    `http://scikit-learn.org/stable/modules/model_evaluation.html`_.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    # `feat_arr`
    feat_arr_type_valid = (list, np.ndarray)
    if not (isinstance(feat_arr, feat_arr_type_valid)):
        msg = '{0} `feat_arr` ({1}) is not a valid input type'.format(
            file_msg, type(feat_arr))
        raise LSSUtils_Error(msg)
    # `truth_arr`
    truth_arr_type_valid = (list, np.ndarray)
    if not (isinstance(truth_arr, truth_arr_type_valid)):
        msg = '{0} `truth_arr` ({1}) is not a valid input type'.format(
            file_msg, type(truth_arr))
        raise LSSUtils_Error(msg)
    # `score_method` - Type
    score_method_type_valid = (str)
    if not (isinstance(score_method, score_method_type_valid)):
        msg = '{0} `score_method` ({1}) is not a valid input type'.format(
            file_msg, type(score_method))
        raise LSSUtils_Error(msg)
    # `score_method` - Value
    score_method_valid = ['perc', 'threshold', 'model_score', 'r2']
    if not (score_method in score_method_valid):
        msg = '{0} `score_method` ({1}) is not a valid input!'.format(
            file_msg, score_method)
        raise LSSUtils_Error(score_method)
    # `threshold` - Type
    threshold_valid = (float, int)
    if not (isinstance(threshold, threshold_valid)):
        msg = '{0} `threshold` ({1}) is not a valid input type'.format(
            file_msg, type(threshold))
        raise LSSUtils_Error(msg)
    # `threshold` - Value
    if not (threshold >= 0.):
        msg = '{0} `threshold` ({1}) must be larger than 0!'.format(
            file_msg, threshold)
        raise LSSUtils_Error(msg)
    ##
    ## Checking for `model` and `pred_arr`
    # If both are none
    if ((model == None) and (pred_arr == None)):
        msg  = '{0} `model` and `pred_arr` cannot be both `None`. '
        msg += 'Only one can be `None`'
        msg  = msg.format(file_msg)
        raise LSSUtils_Error(msg)
    # `pred_arr` - Type
    pred_arr_valid = ((list, np.ndarray))
    if (model == None):
        if not (isinstance(pred_arr, pred_arr_valid)):
            msg = '{0} `pred_arr` ({1}) is not a valid input type!'.format(
                file_msg, type(pred_arr))
            raise LSSUtils_Error(msg)
    ##
    ## Choosing scoring method
    # Percentile method
    if (score_method == 'perc'):
        # Checking for `pred_arr`
        if (pred_arr == None):
            pred_arr = model.predict(feat_arr)
        # Checking for `model`
        if (model == None):
            pred_arr = np.asarray(pred_arr)
        # Error calcualtion
        pred_err     = np.abs(pred_arr - truth_arr)
        method_score = scipy.stats.scoreatpercentile(pred_err, 100.*perc_val)
    # Threshold method
    if (score_method == 'threshold'):
        # Checking for `pred_arr`
        if (pred_arr == None):
            pred_arr = model.predict(feat_arr)
        # Checking for `model`
        if (model == None):
            pred_arr = np.asarray(pred_arr)
        # Error calcualtion
        pred_err     = np.abs(pred_arr - truth_arr)
        pred_thresh  = len(pred_err[pred_err <= threshold])
        method_score = pred_thresh / len(pred_arr)
    # R-squared method
    if (score_method == 'r2'):
        # Checking for `pred_arr`
        if (pred_arr == None):
            pred_arr = model.predict(feat_arr)
        # Checking for `model`
        if (model == None):
            pred_arr = np.asarray(pred_arr)
        # Error calcualtion
        method_score = skmetrics.r2_score(truth_arr, pred_arr)
    # Model method
    if (score_method == 'model_score'):
        method_score = model.score(feat_arr, truth_arr)

    return method_score
コード例 #18
0
def train_test_dataset(pred_arr, feat_arr, pre_opt='min_max',
    shuffle_opt=True, random_state=0, test_size=0.25):
    """
    Function to create the training and testing datasets for a given set 
    of features array and predicted array.

    Parameters
    -----------
    pred_arr : `np.ndarray` or array-like, shape (n_samples, n_outcomes)
        Array consisting of the `predicted values`. The dimensions of 
        `pred_arr` are `n_samples` by `n_outcomes`, where `n_samples` is the 
        number of observations, and `n_outcomes` the number of predicted 
        outcomes.

    feat_arr : `np.ndarray` or array-like, shape (n_samples, n_features)
        Array consisting of the `predicted values`. The dimensions of 
        `feat_arr` are `n_samples` by `n_features`, where `n_samples` 
        is the number of observations, and `n_features` the number of 
        features used.

    pre_opt : {'min_max', 'standard', 'normalize', 'no'} `str`, optional
        Type of preprocessing to do on `feat_arr`.

        Options:
            - 'min_max' : Turns `feat_arr` to values between (0,1)
            - 'standard' : Uses the `sklearn.preprocessing.StandardScaler` method
            - 'normalize' : Uses the `sklearn.preprocessing.Normalizer` method
            - 'no' : No preprocessing on `feat_arr`

    shuffle_opt : `bool`, optional
        If True, the data is shuffled before splitting into testing and 
        training datasets. This variable is set to True by default.

    random_state : int, optional
        Random state number used for when splitting into training and 
        testing datasets. If set, it will always have the same seed 
        `random_state`. This variable is set to `0` by default.

    test_size : float, optional
        Percentage of the catalogue that represents the `test` size of 
        the testing dataset. This variable must be between (0,1).
        This variable is set to `0.25` by default.

    Returns
    -----------
    train_dict : `dict`
        Dictionary containing the `training` data from the catalogue.

    test_dict : `dict`
        Dictionary containing the `testing` data from the catalogue.

    See also
    -----------
    data_preprocessing : Function to preprocess a dataset.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    # `pred_arr`
    pred_arr_type_valid = (list, np.ndarray)
    if not (isinstance(pred_arr, pred_arr_type_valid)):
        msg = '{0} `pred_arr` ({1}) is not a valid input type'.format(
            file_msg, type(pred_arr))
        raise LSSUtils_Error(msg)
    # `feat_arr`
    feat_arr_type_valid = (list, np.ndarray)
    if not (isinstance(feat_arr, feat_arr_type_valid)):
        msg = '{0} `feat_arr` ({1}) is not a valid input type'.format(
            file_msg, type(feat_arr))
        raise LSSUtils_Error(msg)
    # `pre_opt`
    pre_opt_valid = ['min_max', 'standard', 'normalize', 'no']
    if not (pre_opt in pre_opt_valid):
        msg = '{0} `pre_opt` ({1}) is not a valid input'.format(
            file_msg, pre_opt)
        raise LSSUtils_Error(msg)
    # `shuffle_opt`
    shuffle_opt_type_valid = (bool)
    if not (shuffle_opt in shuffle_opt_type_valid):
        msg = '{0} `shuffle_opt` ({1}) is not a valid input'.format(
            file_msg, shuffle_opt)
        raise LSSUtils_Error(msg)
    # `random_state`
    random_state_type_valid = (int)
    if not (isinstance(random_state, random_state_type_valid)):
        msg = '{0} `random_state` ({1}) is not a valid input'.format(
            file_msg, random_state)
        raise LSSUtils_Error(msg)
    # `test_size`
    if not ((test_size > 0) and (test_size < 1.)):
        msg = '{0} `test_size` ({1}) must be in range (0,1)'.format(
            file_msg, test_size)
        raise LSSUtils_Error(msg)
    ##
    ## Checking dimensions of `pred_arr` and `feat_arr`
    pred_arr = np.asarray(pred_arr)
    feat_arr = np.asarray(feat_arr)
    # Dimensions
    if (pred_arr.ndim) == 1:
        pred_arr = pred_arr.reshape(len(pred_arr),)
    if (feat_arr.ndim) == 1:
        feat_arr = feat_arr.reshape(len(feat_arr),)
    # Shape
    if (len(pred_arr) != len(feat_arr)):
        msg  = '{0} The shape of `pred_arr` ({1}) and `feat_arr` ({2}) must '
        msg += 'have the same length'
        msg  = msg.format(file_msg, len(pred_arr), len(feat_arr))
        raise LSSUtils_Error(msg)
    ##
    ## Rescaling Dataset
    feat_arr_scaled = data_preprocessing( feat_arr, pre_opt=pre_opt )
    ##
    ## Splitting into `Training` and `Testing` datasets.
    # Scaled
    (   X_train, X_test,
        Y_train, Y_test) = skms.train_test_split(   feat_arr_scaled,
                                                    pred_arr,
                                                    test_size=test_size,
                                                    shuffle=shuffle_opt,
                                                    random_state=random_state)
    # Not-scaled
    (   X_train_ns, X_test_ns,
        Y_train_ns, Y_test_ns) = skms.train_test_split( feat_arr,
                                                        pred_arr,
                                                        test_size=test_size,
                                                        shuffle=shuffle_opt,
                                                        random_state=random_state)
    ##
    ## Assigning `training` and `testing` datasets to dictionaries
    train_dict = {  'X_train': X_train, 'Y_train': Y_train,
                    'X_train_ns':X_train_ns, 'Y_train_ns':Y_train_ns}
    test_dict  = {'X_test' : X_test , 'Y_test' : Y_test,
                    'X_test_ns':X_test_ns, 'Y_test_ns':Y_test_ns}

    return train_dict, test_dict
コード例 #19
0
def catl_keys_prop(catl_kind, catl_info='members', return_type='list'):
    """
    Dictionary keys for the diffeent galaxy and group properties of 
    catalogues.

    Parameters
    ------------
    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    catl_info : {'members', 'groups'} str, optional
        Option for which kind of catalogues to use.

        Options:
            - `members` : Member galaxies of group catalogues
            - `groups` : Catalogues with `group` information.

    return_type : {'list', 'dict'} str, optional
        Type of output to the be returned. This variable is set to `list`
        by default.

        Options:
            - 'list' : Returns the values as part of a list
            - 'dict' : Returns the values as part of a python dictionary

    Return
    ------------
    catl_objs : python dictionary or array_like
        Dictionary/array with the proper keys for the catalogue(s).

        Order : 1) `ssfr_key`, 2) `mstar_key`

    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.
    
    Examples
    ------------
    >>> catl_keys_prop('data')
    ['logssfr', 'logMstar_JHU']

    >>> catl_keys_prop('mocks', catl_info='groups', return_type='list')
    ['logssfr', 'logMstar']
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    catl_kind_valid = ['data', 'mocks']
    catl_info_valid = ['members', 'groups']
    return_type_valid = ['list', 'dict']
    # `catl_kind`
    if not (catl_kind in catl_kind_valid):
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `catl_info`
    if not (catl_info in catl_info_valid):
        msg = '{0} `catl_info` ({1}) is not a valid input!'.format(
            file_msg, catl_info)
        raise LSSUtils_Error(msg)
    # `return_type`
    if not (return_type in return_type_valid):
        msg = '{0} `return_type` ({1}) is not a valid input!'.format(
            file_msg, return_type)
        raise LSSUtils_Error(msg)
    ##
    ## Property keys
    ##
    ## Data
    if (catl_kind == 'data'):
        ## Members
        if catl_info == 'members':
            # SSFR and Stellar mass
            logssfr_key, logmstar_key = ['logssfr', 'logMstar_JHU']
        ## Groups
        if catl_info == 'groups':
            # SSFR and Stellar mass
            logssfr_key, logmstar_key = ['logssfr_tot', 'logMstar_tot']
    ##
    ## Mocks
    if (catl_kind == 'mocks'):
        ## Members
        if catl_info == 'members':
            # SSFR and Stellar mass
            logssfr_key, logmstar_key = ['logssfr', 'logMstar']
        ## Groups
        if catl_info == 'groups':
            # SSFR and Stellar mass
            logssfr_key, logmstar_key = ['logssfr', 'logMstar']
    ##
    ## Saving values
    if return_type == 'dict':
        catl_objs = {'logssfr_key': logssfr_key, 'logmstar_key': logmstar_key}
    elif return_type == 'list':
        catl_objs = [logssfr_key, logmstar_key]

    return catl_objs
コード例 #20
0
def absolute_to_apparent_magnitude(abs_mag, lum_dist, unit='mpc'):
    """
    Calculates the apparent magnitude using the luminosity and absolute
    magnitude.

    Parameters
    -----------
    abs_mag : float, int, or array_like
        Array of absolute magnitude(s)

    lum_dist : array_like
        Array of luminosity distnace to object. In units of `Mpc`.

    unit : {'pc', 'kpc', 'mpc'} str, optional
        Unit to use for `lum_dist`. This variable is set to `mpc` by
        default. When `pc`, the units are in parsecs, while `mpc` is for 
        distances in mega-parsecs, etc.

    Returns
    -----------
    app_mag : array_like, or float
        Array of apparent magnitude(s). `app_mag` is a float if 
        `abs_mag` is a float or int.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    valid_types = (float, np.ndarray, list, int)
    # Type for `abs_mag`
    if not (isinstance(abs_mag, valid_types)):
        msg = '{0} `abs_mag` ({1}) is not a valid type!'.format(
            file_msg, type(abs_mag))
        raise LSSUtils_Error(msg)
    # Type for `unit`
    unit_valid_arr = ['mpc', 'pc']
    if not (unit in unit_valid_arr):
        msg = '{0} `unit` ({1}) is not a valid input!'.format(file_msg, unit)
        raise LSSUtils_Error(msg)
    ## Converting to array-type
    # `abs_mag` object
    if (isinstance(abs_mag, float) or isinstance(abs_mag, int)):
        abs_mag = float(abs_mag)
    if (isinstance(abs_mag, list) or isinstance(abs_mag, np.ndarray)):
        abs_mag = np.asarray(abs_mag)
    # `lum_dist` object
    if (isinstance(lum_dist, float) or isinstance(lum_dist, int)):
        lum_dist = float(lum_dist)
    if (isinstance(lum_dist, list) or isinstance(lum_dist, np.ndarray)):
        lum_dist = np.asarray(lum_dist)
    # Units - Conveting to Mpc
    # This follows the formula:
    #   app_mag - abs_mag = 5 * (np.log10(lum_dist) + a - 1)
    #       Where a = 0 when [d] = parsecs
    #       Where a = 3 when [d] = kiloparsecs
    #       Where a = 6 when [d] = megaparsecs
    if unit == 'pc':
        a = 0
    elif unit == 'kpc':
        a = 3
    elif unit == 'mpc':
        a = 6
    ##
    ## Calcualtions
    app_mag = abs_mag + 5. * (np.log10(lum_dist) - 1 + a)

    return app_mag
コード例 #21
0
def absolute_magnitude_lim(z, mag_lim, cosmo=None, H0=100., verbose=True):
    """
    Calculates the absolute magnitude limit as function of redshift `z` for 
    a flux-limited survey.

    Parameters
    -----------
    z : float, int, or array_like
        Maximum redshift for a given flux-limited survey.

    mag_lim : float
        Apparent magnitude limit of the flux-limited survey.

    cosmo : `astropy.cosmology` object
        Cosmology object from Astropy.

    H0 : float, optional
        Hubble parameters value used to estimate distances.
        This variable is set to 100 km/s/Mpc by default.

    verbose : boolean, optional
        If True, a message will appear when the default cosmology is used.

    Returns
    -----------
    abs_mag : float, int, or array_like
        Absolute magnitude limit in units of `abs_mag` + 5*log10(h),
        where `h` is the little Hubble parameter.

    Raises
    ----------
    LSSUtils_Error : Exception
        Program exception if input parameters are accepted
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    # Redshift
    z_valid_types = (float, int, list, np.ndarray)
    if not (isinstance(z, z_valid_types)):
        msg = '{0} `z` ({1}) is not a valid type!'.format(file_msg, type(z))
        raise LSSUtils_Error(msg)
    # Magnitude limit
    mag_lim_valid_types = (float, int)
    if not (isinstance(mag_lim, mag_lim_valid_types)):
        msg = '{0} `mag_lim` ({1}) is not a valid type!'.format(
            file_msg, type(mag_lim))
        raise LSSUtils_Error(msg)
    # Hubble parameter value
    H0_valid_types = (float, int)
    if not (isinstance(H0, H0_valid_types)):
        msg = '{0} `H0` ({1}) is not a valid type!'.format(file_msg, type(H0))
        raise LSSUtils_Error(msg)
    ##
    ## Calculations
    if not cosmo:
        from astropy.cosmology import FlatLambdaCDM
        cosmo = FlatLambdaCDM(H0=H0, Om0=0.316)
        if verbose:
            print(">> Warning: No cosmology was specified. Using default:",
                  cosmo)
    ## Luminosity distance
    lum_dist = cosmo.luminosity_distance(z).value
    ## Absolute magnitude
    abs_mag = apparent_to_absolute_magnitude(mag_lim, lum_dist)

    return abs_mag
コード例 #22
0
def catl_keys(catl_kind, perf_opt=False, return_type='list'):
    """
    Dictionary keys for the different types of catalogues

    Parameters
    ----------
    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    perf_opt : boolean, optional
        Option for using a `perfect` mock catalogue.

    return_type : {'list', 'dict'} str, optional
        Type of output to the be returned. This variable is set to `list`
        by default.

        Options:
            - 'list' : Returns the values as part of a list
            - 'dict' : Returns the values as part of a python dictionary

    Returns
    ----------
    catl_keys : python dictionary or array_like
        Dictionary/array with the proper keys for the catalogue(s).

        Order : 1) `gm_key`, 2) `id_key`, 3) `galtype_key`

    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.

    Examples
    ----------
    >>> catl_keys('data', perf_opt=False, return_type='list')
    ['M_h', 'groupid', 'galtype']

    >>> catl_keys('mocks', perf_opt=True, return_type='list')
    ['M_h', 'haloid', 'galtype']
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    # `catl_kind`
    if not (catl_kind in ['data', 'mocks']):
        msg = '{0} `catl_kind` ({1}) is not a valid input parameter!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `return_type`
    if not (return_type in ['list', 'dict']):
        msg = '{0} `return_type` ({1}) is not a valid input parameter'.format(
            file_msg, return_type)
        raise LSSUtils_Error(msg)
    # `perf_opt`
    if not (isinstance(perf_opt, bool)):
        msg = '{0} `perf_opt` ({1}) must be a boolean object!'.format(
            file_msg, type(perf_opt))
        raise LSSUtils_Error(msg)
    ##
    ## Perfect Catalogue
    if catl_kind == 'data':
        perf_opt = False
    ##
    ## Property keys
    if catl_kind == 'data':
        gm_key, id_key, galtype_key = ['M_h', 'groupid', 'galtype']
    elif catl_kind == 'mocks':
        if perf_opt:
            gm_key, id_key, galtype_key = ['M_h', 'haloid', 'galtype']
        else:
            gm_key, id_key, galtype_key = ['M_group', 'groupid', 'g_galtype']
    ##
    ## Saving values
    if return_type == 'dict':
        catl_objs = {
            'gm_key': gm_key,
            'id_key': id_key,
            'galtype_key': galtype_key
        }
    elif return_type == 'list':
        catl_objs = [gm_key, id_key, galtype_key]

    return catl_objs
コード例 #23
0
def data_preprocessing(feat_arr, pre_opt='min_max'):
    """
    Preprocess the data used, in order to clean and make the data more
    suitable for the machine learning algorithms

    Parameters
    -----------
    feat_arr : `numpy.ndarray`
        Array of feature values. This array is used for training a 
        ML algorithm.

    pre_opt : {'min_max', 'standard', 'normalize', 'no'} `str`, optional
        Type of preprocessing to do on `feat_arr`.

        Options:
            - 'min_max' : Turns `feat_arr` to values between (0,1)
            - 'standard' : Uses the `~sklearn.preprocessing.StandardScaler` method
            - 'normalize' : Uses the `~sklearn.preprocessing.Normalizer` method
            - 'no' : No preprocessing on `feat_arr`

    Returns
    -----------
    feat_arr_scaled : `numpy.ndarray`
        Rescaled version of `feat_arr` based on the choice of `pre_opt`.

    Notes
    -----------
    For more information on how to pre-process your data, see 
    `http://scikit-learn.org/stable/modules/preprocessing.html`_.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    # `feat_arr`
    feat_arr_type_valid = (list, np.ndarray)
    if not (isinstance(feat_arr, feat_arr_type_valid)):
        msg = '{0} `feat_arr` ({1}) is not a valid input type'.format(
            file_msg, type(feat_arr))
        raise LSSUtils_Error(msg)
    # `pre_opt`
    pre_opt_valid = ['min_max', 'standard', 'normalize', 'no']
    if not (pre_opt in pre_opt_valid):
        msg = '{0} `pre_opt` ({1}) is not a valid input'.format(
            file_msg, pre_opt)
        raise LSSUtils_Error(msg)
    ##
    ## Scaling `feat_arr`
    if (pre_opt == 'min_max'):
        # Scaler
        scaler = skpre.MinMaxScaler(feature_range=(0,1))
        # Rescaling
        feat_arr_scaled = scaler.fit_transform(feat_arr)
    ## Standardize Data
    if pre_opt == 'standard':
        # Scaler
        scaler = skpre.StandardScaler().fit(feat_arr)
        # Rescaling
        feat_arr_scaled = scaler.transform(feat_arr)
    ## Normalize Data
    if pre_opt == 'normalize':
        # Scaler
        scaler = skpre.Normalizer().fit(feat_arr)
        # Rescaling
        feat_arr_scaled = scaler.transform(feat_arr)
    ## No Preprocessing
    if pre_opt == 'no':
        feat_arr_scaled = feat_arr

    return feat_arr_scaled
コード例 #24
0
def catl_sdss_dir(catl_kind='data',
                  catl_type='mr',
                  sample_s='19',
                  catl_info='members',
                  halotype='fof',
                  clf_method=3,
                  hod_n=0,
                  clf_seed=1235,
                  perf_opt=False,
                  print_filedir=True):
    """
    Extracts the path to the synthetic catalogues.

    Parameters
    -----------
    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    catl_type : {'mr', 'mstar'} str, optional
        Type of catalogue to use. It shows which abundance matching method
        was used for the CLF when assigning halo masses. This variable is 
        set to 'mr' by default.

        Options:
            - `mr` : Uses r-band absolute magnitude
            - `mstar` : Uses stellar masses

    sample_s : {'19', '20', '21'} str, optional
        Volume-limited sample to use. This variable is set to '19' by default.

        Options:
            - '19' : Uses the Mr19 volume-limited sample, i.e. 'Consuelo'
            - '20' : Uses the Mr20 volume-limited sample, i.e. 'Esmeralda'
            - '21' : Uses the Mr21 volume-limited sample, i.e. 'Carmen'

    catl_info : {'members', 'groups'} str, optional
        Option for which kind of catalogues to use.

        Options:
            - `members` : Member galaxies of group catalogues
            - `groups` : Catalogues with `group` information.

    halotype : {'fof', 'so'} str, optional
        Type of the dark matter halo of the simulation used to create the 
        synthetic catalogues. This variable is set to `fof` by default.

        Options:
            - 'fof': Friends-of-Friends halos.
            - 'so' : Spherical overdensity halos.

    clf_method : {1, 2, 3} int, optional
        Method for assigning galaxy properties to mock galaxies.
        This variable is set to `3` by default.

        Options:
            - `1` : Independent assigment of (g-r) color, sersic, and log(ssfr)
            - `2` : (g-r) decides active/passive designation and draw values 
                    independently.
            - `3` : (g-r) decides active/passive designations, and 
                    assigns other galaxy properties for that given galaxy.

    hod_n : {0, 1} int, optional
        HOD model to use. Only relevant when `catl_kind == mocks`.

    clf_seed : int, optional
        Seed used for the `CLF` random seed. This variable is set to `1235` 
        by default.

    perf_opt : boolean, optional
        If True, it chooses to analyze the `perfect` set of synthetic
        catalogues. This variable is set to `False` by default.

    print_filedir : boolean, optional
        If True, the output directory is printed onto the screen.
    
    Returns
    -----------
    catls_path : str
        Path to the desired set of synthetic catalogues.

    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    catl_kind_valid = ['data', 'mocks']
    catl_type_valid = ['mr', 'mstar']
    sample_s_valid = ['19', '20', '21']
    catl_info_valid = ['members', 'groups']
    halotype_valid = ['fof', 'so']
    clf_method_valid = [1, 2, 3]
    hod_n_valid = [0, 1]
    # `catl_kind`
    if not (catl_kind in catl_kind_valid):
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `catl_type`
    if not (catl_type in catl_type_valid):
        msg = '{0} `catl_type` ({1}) is not a valid input!'.format(
            file_msg, catl_type)
        raise LSSUtils_Error(msg)
    # `sample_s`
    if not (sample_s in sample_s_valid):
        msg = '{0} `sample_s` ({1}) is not a valid input!'.format(
            file_msg, sample_s)
        raise LSSUtils_Error(msg)
    # `catl_info`
    if not (catl_info in catl_info_valid):
        msg = '{0} `catl_info` ({1}) is not a valid input!'.format(
            file_msg, catl_info)
        raise LSSUtils_Error(msg)
    # `halotype`
    if not (halotype in halotype_valid):
        msg = '{0} `halotype` ({1}) is not a valid input!'.format(
            file_msg, halotype)
        raise LSSUtils_Error(msg)
    # `clf_method`
    if not (clf_method in clf_method_valid):
        msg = '{0} `clf_method` ({1}) is not a valid input!'.format(
            file_msg, clf_method)
        raise LSSUtils_Error(msg)
    # `hod_n`
    if not (hod_n in hod_n_valid):
        msg = '{0} `hod_n` ({1}) is not a valid input!'.format(file_msg, hod_n)
        raise LSSUtils_Error(msg)
    # `perf_opt`
    if not (isinstance(perf_opt, bool)):
        msg = '{0} `perf_opt` ({1}) is not a valid type!'.format(
            file_msg, type(perf_opt))
        raise LSSUtils_Error(msg)
    # `print_filedir`
    if not (isinstance(print_filedir, bool)):
        msg = '{0} `print_filedir` ({1}) is not a valid type!'.format(
            file_msg, type(print_filedir))
        raise LSSUtils_Error(msg)
    ##
    ## Type of catalogue
    if catl_info == 'members':
        catl_info_str = 'member_galaxy_catalogues'
    elif catl_info == 'groups':
        catl_info_str = 'group_galaxy_catalogues'
    ##
    ## Perfect catalogue
    if perf_opt:
        # Data
        if catl_kind == 'data':
            msg = '{0} Invalid `catl_kind` ({1}) for when `perf_opt == True'
            msg = msg.format(file_msg, catl_kind)
            raise LSSUtils_Error(msg)
        # Mocks
        catl_info_perf_str = 'perfect_{0}'.format(catl_info_str)
    else:
        # Mocks
        catl_info_perf_str = catl_info_str
    ##
    ## Extracting path of the files
    # Data
    if catl_kind == 'data':
        # Joining paths
        filedir = os.path.join(wp.get_output_path(), 'SDSS', catl_kind,
                               catl_type, 'Mr' + sample_s, catl_info_perf_str)
    # Mocks
    if catl_kind == 'mocks':
        # Joining paths
        filedir = os.path.join(wp.get_output_path(), 'SDSS', catl_kind,
                               'halos_{0}'.format(halotype),
                               'hod_model_{0}'.format(hod_n),
                               'clf_seed_{0}'.format(clf_seed),
                               'clf_method_{0}'.format(clf_method), catl_type,
                               'Mr' + sample_s, catl_info_perf_str)
    ##
    ## Making sure `filedir` exists
    if not (os.path.exists(filedir)):
        msg = '{0} `filedir` ({1}) does NOT exist! Check input variables'
        msg = msg.format(file_msg, filedir)
        raise LSSUtils_Error(msg)
    ##
    ## Printing out paths
    if print_filedir:
        print('{0} `filedir`: {1}'.format(file_msg, filedir))

    return filedir
コード例 #25
0
def sigma_calcs(data_arr,
                type_sigma='std',
                perc_arr=[68., 95., 99.7],
                return_mean_std=False):
    """
    Calcualates the 1-, 2-, and 3-sigma ranges for `data_arr`

    Parameters
    -----------
    data_arr : `numpy.ndarray`, shape( param_dict['nrpbins'], param_dict['itern_tot'])
        array of values, from which to calculate percentiles or St. Dev.

    type_sigma : {'perc', 'std'} string, optional (default = 'std')
        Option for calculating either `percentiles` or `standard deviations`
            - 'perc': calculates percentiles
            - 'std' : uses standard deviations as 1-, 2-, and 3-sigmas

    perc_arr : array_like, optional (default = [68., 95., 99.7])
        Array of percentiles to calculate

    return_mean_std : boolean, optional (default = False)
        Option for returning mean and St. Dev. along with `sigma_dict`

    Return
    ----------
    sigma_dict: python dicitionary
        dictionary containg the 1-, 2-, and 3-sigma upper and lower 
        ranges for `data-arr`

    mark_mean: array_like
        array of the mean value of `data_arr`.
        Only returned if `return_mean_std == True`

    mark_std: array_like
        array of the St. Dev. value of `data_arr`.
        Only returned if `return_mean_std == True`
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input variables
    # `data_arr`
    data_arr_valid_types = (np.ndarray, list)
    if not (isinstance(data_arr, data_arr_valid_types)):
        msg = '{0} `data_arr` ({1}) is not a valid type!'.format(
            file_msg, type(data_arr))
        raise LSSUtils_Error(msg)
    else:
        data_arr = np.asarray(data_arr)
    # `type_sigma`
    type_sigma_valid = ['perc', 'std']
    if not (isinstance(type_sigma, str)):
        msg = '{0} `type_sigma` ({1}) is not a valid type!'.format(
            file_msg, type(type_sigma))
        raise LSSUtils_Error(msg)
    if not (type_sigma in type_sigma_valid):
        msg = '{0} `type_sigma` ({1}) is not a valid input choice!'.format(
            file_msg, type_sigma)
    ## Determining shape of `data_arr`
    if data_arr.ndim == 1:
        axis = 0
    else:
        axis = 1
    ## Creating dictionary for saving `sigma`s
    sigma_dict = {}
    for ii in range(len(perc_arr)):
        sigma_dict[ii] = []
    ## Using Percentiles to estimate errors
    if type_sigma == 'perc':
        for ii, perc_ii in enumerate(perc_arr):
            mark_lower = np.nanpercentile(data_arr,
                                          50. - (perc_ii / 2.),
                                          axis=axis)
            mark_upper = np.nanpercentile(data_arr,
                                          50. + (perc_ii / 2.),
                                          axis=axis)
            # Saving to dictionary
            sigma_dict[ii] = np.column_stack((mark_lower, mark_upper)).T
    ## Using standard deviations to estimate errors
    if type_sigma == 'std':
        mean_val = np.nanmean(data_arr, axis=axis)
        std_val = np.nanstd(data_arr, axis=axis)
        for ii in range(len(perc_arr)):
            mark_lower = mean_val - ((ii + 1) * std_val)
            mark_upper = mean_val + ((ii + 1) * std_val)
            # Saving to dictionary
            sigma_dict[ii] = np.column_stack((mark_lower, mark_upper)).T
    ##
    ## Estimating mean and St. Dev. of `data_arr`
    mark_mean = np.nanmean(data_arr, axis=axis)
    mark_std = np.nanstd(data_arr, axis=axis)
    ## Fixing values for when `axis == 0`
    if data_arr.ndim == 1:
        for ii in range(len(sigma_dict.keys())):
            sigma_dict[ii] = sigma_dict[ii].flatten()

    if return_mean_std:
        return sigma_dict, mark_mean, mark_std
    else:
        return sigma_dict
コード例 #26
0
def spherematch(ra1, dec1, ra2, dec2, tol=None, nnearest=1, nthreads=1):
    """
    Determines the matches between two catalogues of sources with 
    <ra, dec> coordinates.

    Parameters
    ----------
    ra1, dec1 : array_like
        Right ascension and declination of the 1st catalogue.
        Units are in `degrees`.

    ra2, dec2 : array_like
        Right ascension and declination of the 2nd catalogue.
        Units are in `degrees`.

    tol : float or None, optional
        How close (in degrees) a match has to be to count as a match.
        If None, all nearest neighbors for the 1st catalogue will be returned.

    nnearest : int, optional
        The nth neighbor to find. E.g. 1 for the nearest nearby, 2 for the 
        second nearest neighbor, etc. Partcularly useful if you want to get
        the nearest *non-self* neighbor of a catalogue.
        To do this use::

        ``spherematch(ra, dec, ra, dec, nnearest=2)``

        if `nnearest == 0`, all matches are returned.

    nthreads : int, optional
        Number of threads to use for calculation. This variable is set to 
        1 by default. Must be larger than 1.

    Returns
    ----------
    idx1 : int `numpy.ndarray`
        Indices of the 1st catalogue of the matches. Will never be larger 
        than `ra1`/`dec1`.

    idx2 : int `numpy.ndarray`
        Indices of the 2nd catalogue of the matches. Will never be larger
        than `ra1`/`dec1`.

    ds : float `numpy.ndarray`
        Distance (in degrees) between the matches.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input arguments
    valid_types = (list, np.ndarray)
    # `ra1`
    if not (isinstance(ra1, valid_types)):
        msg = '{0} `ra1` ({1}) is not a valid type!'.format(file_msg, type(ra1))
        raise LSSUtils_Error(msg)
    # `dec1`
    if not (isinstance(dec1, valid_types)):
        msg = '{0} `dec1` ({1}) is not a valid type!'.format(file_msg, type(dec1))
        raise LSSUtils_Error(msg)
    # `ra2`
    if not (isinstance(ra2, valid_types)):
        msg = '{0} `ra2` ({1}) is not a valid type!'.format(file_msg, type(ra2))
        raise LSSUtils_Error(msg)
    # `dec2`
    if not (isinstance(dec2, valid_types)):
        msg = '{0} `dec2` ({1}) is not a valid type!'.format(file_msg, type(dec2))
        raise LSSUtils_Error(msg)
    # `nnearest`
    if nnearest < 0:
        msg = '{0} `nnearest` ({1}) must be larger than `0`!'.format(file_msg,
            nnearest)
        raise LSSUtils_Error(msg)
    # `threads`
    if nthreads < 1:
        msg = '{0} `nthreads` ({1}) must be larger than `1`!'.format(file_msg,
            nthreads)
        raise LSSUtils_Error(msg)
    ##
    ## Converting arguments into arrays for ease of use
    ra1  = np.array(ra1 , copy=False)
    dec1 = np.array(dec1, copy=False)
    ra2  = np.array(ra2 , copy=False)
    dec2 = np.array(dec2, copy=False)
    ## Checking shape
    # 1st catalogue
    if ra1.shape != dec1.shape:
        msg = '{0} The shape of `ra1` ({1}) does not mathc that of `dec1` ({2}).'
        msg = msg.format(file_msg, ra1.shape, dec1.shape)
        raise LSSUtils_Error(msg)
    # 2nd catalogue
    if ra2.shape != dec2.shape:
        msg = '{0} The shape of `ra2` ({1}) does not mathc that of `dec2` ({2}).'
        msg = msg.format(file_msg, ra2.shape, dec2.shape)
        raise LSSUtils_Error(msg)
    ##
    ## Converting spherical coordinates into cartesian coordinates
    # 1st catalogue
    x1, y1, z1 = _spherical_to_cartesian_fast(  ra1.ravel(),
                                                dec1.ravel(),
                                                nthreads)
    coords1 = np.empty((x1.size,3))
    coords1[:, 0] = x1
    coords1[:, 1] = y1
    coords1[:, 2] = z1
    # 2nd catalogue
    x2, y2, z2 = _spherical_to_cartesian_fast(  ra2.ravel(),
                                                dec2.ravel(),
                                                nthreads)
    coords2 = np.empty((x2.size,3))
    coords2[:, 0] = x2
    coords2[:, 1] = y2
    coords2[:, 2] = z2
    ##
    ## Finding nearest neighbors
    kdt = KDT(coords2)
    # Finding neighbors
    if nnearest == 1:
        idx_s2 = kdt.query(coords1)[1]
    elif (nnearest == 0) and (tol is not None): # if you want ALL matches!
        p1_x, p1_y, p1_z = _spherical_to_cartesian_fast(90., 0  , nthreads)
        p2_x, p2_y, p2_z = _spherical_to_cartesian_fast(90., tol, nthreads)
        # Converting to floats
        p1_x   = float(p1_x)
        p1_y   = float(p1_y)
        p1_z   = float(p1_z)
        p2_x   = float(p2_x)
        p2_y   = float(p2_y)
        p2_z   = float(p2_z)
        r      = np.sqrt((p2_x - p1_x)**2 + (p2_y - p1_y)**2 + (p2_z - p1_z)**2)
        idx_s2 = kdt.query_ball_point(coords1, r)[0]
    elif nnearest > 1:
        idx_s2 = kdt.query(coords1, nnearest)[1][:, -1]
    else:
        msg = '{0} Invalid `nnearest` ({1})!'.format(file_msg, nnearest)
        raise LSSUtils_Error(msg)
    ##
    ## Calculating distance between matches
    ds = _great_circle_distance_fast(   ra1         ,
                                        dec1        ,
                                        ra2[idx_s2] ,
                                        dec2[idx_s2],
                                        nthreads    )
    ##
    ## If `tol` is None, then all objects will have a match.
    idx_s1 = np.arange(ra1.size)
    ##
    ## Remove matches that are `beyond` the tolerance separation
    if (tol is not None) and (nnearest != 0):
        mask   = ds < tol
        idx_s1 = idx_s1[mask]
        idx_s2 = idx_s2[mask]
        ds     = ds    [mask]

    return idx_s1, idx_s2, ds
コード例 #27
0
def catl_sdss_merge(catl_pd_ii,
                    catl_kind='data',
                    catl_type='mr',
                    sample_s='19',
                    halotype='fof',
                    clf_method=3,
                    hod_n=0,
                    clf_seed=1235,
                    perf_opt=False,
                    return_memb_group=False,
                    print_filedir=False):
    """
    Merges the member and group catalogues for a given set of input parameters,
    and returns a modified version of the galaxy group catalogues with added
    info about the galaxy groups.

    Parameters
    ------------
    catl_pd_ii : int
        Index of the catalogue to match, 
        from :func:`~cosmoutils.mock_catalogues.catls_utils.extract_catls`
        function.

    catl_kind : {'data', 'mocks'} str, optional
        Type of catalogue to use. This variable is set to `data` by default.

        Options:
            - `data` : catalogues come from SDSS `real` catalogue
            - `mocks` : catalogue come from SDSS `mock` catalogues

    catl_type : {'mr', 'mstar'} str, optional
        Type of catalogue to use. It shows which abundance matching method
        was used for the CLF when assigning halo masses. This variable is 
        set to 'mr' by default.

        Options:
            - `mr` : Uses r-band absolute magnitude
            - `mstar` : Uses stellar masses

    sample_s : {'19', '20', '21'} str, optional
        Volume-limited sample to use. This variable is set to '19' by default.

        Options:
            - '19' : Uses the Mr19 volume-limited sample, i.e. 'Consuelo'
            - '20' : Uses the Mr20 volume-limited sample, i.e. 'Esmeralda'
            - '21' : Uses the Mr21 volume-limited sample, i.e. 'Carmen'

    halotype : {'fof', 'so'} str, optional
        Type of the dark matter halo of the simulation used to create the 
        synthetic catalogues. This variable is set to `fof` by default.

        Options:
            - 'fof': Friends-of-Friends halos.
            - 'so' : Spherical overdensity halos.

    clf_method : {1, 2, 3} int, optional
        Method for assigning galaxy properties to mock galaxies.
        This variable is set to `3` by default.

        Options:
            - `1` : Independent assigment of (g-r) color, sersic, and log(ssfr)
            - `2` : (g-r) decides active/passive designation and draw values 
                    independently.
            - `3` : (g-r) decides active/passive designations, and 
                    assigns other galaxy properties for that given galaxy.

    hod_n : {0, 1} int, optional
        HOD model to use. Only relevant when `catl_kind == mocks`.

    clf_seed : int, optional
        Seed used for the `CLF` random seed. This variable is set to `1235` 
        by default.

    perf_opt : boolean, optional
        If True, it chooses to analyze the `perfect` set of synthetic
        catalogues. This variable is set to `False` by default.

    return_memb_group :  `bool`, optional
        If True, the function returns the member and group catalogues,
        along with the merged catalogue.
        It returns ``<memb_group_pd, memb_pd, group_pd>``

    print_filedir : boolean, optional
        If True, the output directory is printed onto the screen.

    Return
    ------------
    memb_group_pd : `pandas.DataFrame`
        Combined version of the i-th member and group catalogues.
        It contains both galaxy and group information.

    memb_pd : `pandas.DataFrame`
        Catalogue of the member galaxies of the i-th catalogue.
        This catalogue contains information of the `member galaxies`.

    group_pd : `pandas.DataFrame`
        Catalogue of the groups of the i-th catalogue.
        This catalogue contains information of the `galaxy groups`.
    
    Raises
    ------------
    LSSUtils_Error : Exception from `LSSUtils_Error`
        Program exception if input parameters are accepted.
    """
    file_msg = fd.Program_Msg(__file__)
    ## Checking input parameters
    catl_pd_ii_valid = (float, int)
    catl_kind_valid = ['data', 'mocks']
    catl_type_valid = ['mr', 'mstar']
    sample_s_valid = ['19', '20', '21']
    catl_info_valid = ['members', 'groups']
    halotype_valid = ['fof', 'so']
    clf_method_valid = [1, 2, 3]
    hod_n_valid = [0, 1]
    # `catl_pd_ii`
    if (isinstance(catl_pd_ii, catl_pd_ii_valid)):
        catl_pd_ii = int(catl_pd_ii)
    else:
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, type(catl_kind))
        raise LSSUtils_Error(msg)
    # `catl_kind`
    if not (catl_kind in catl_kind_valid):
        msg = '{0} `catl_kind` ({1}) is not a valid input!'.format(
            file_msg, catl_kind)
        raise LSSUtils_Error(msg)
    # `catl_type`
    if not (catl_type in catl_type_valid):
        msg = '{0} `catl_type` ({1}) is not a valid input!'.format(
            file_msg, catl_type)
        raise LSSUtils_Error(msg)
    # `sample_s`
    if not (sample_s in sample_s_valid):
        msg = '{0} `sample_s` ({1}) is not a valid input!'.format(
            file_msg, sample_s)
        raise LSSUtils_Error(msg)
    # `halotype`
    if not (halotype in halotype_valid):
        msg = '{0} `halotype` ({1}) is not a valid input!'.format(
            file_msg, halotype)
        raise LSSUtils_Error(msg)
    # `clf_method`
    if not (clf_method in clf_method_valid):
        msg = '{0} `clf_method` ({1}) is not a valid input!'.format(
            file_msg, clf_method)
        raise LSSUtils_Error(msg)
    # `hod_n`
    if not (hod_n in hod_n_valid):
        msg = '{0} `hod_n` ({1}) is not a valid input!'.format(file_msg, hod_n)
        raise LSSUtils_Error(msg)
    # `perf_opt`
    if not (isinstance(perf_opt, bool)):
        msg = '{0} `perf_opt` ({1}) is not a valid type!'.format(
            file_msg, type(perf_opt))
        raise LSSUtils_Error(msg)
    # `return_memb_group`
    if not (isinstance(return_memb_group, bool)):
        msg = '{0} `return_memb_group` ({1}) is not a valid type!'.format(
            file_msg, type(return_memb_group))
        raise LSSUtils_Error(msg)
    # `print_filedir`
    if not (isinstance(print_filedir, bool)):
        msg = '{0} `print_filedir` ({1}) is not a valid type!'.format(
            file_msg, type(print_filedir))
        raise LSSUtils_Error(msg)
    ##
    ## Extracting catalogues given input parameters
    (memb_arr, memb_len) = extract_catls(catl_kind=catl_kind,
                                         catl_type=catl_type,
                                         sample_s=sample_s,
                                         halotype=halotype,
                                         clf_method=clf_method,
                                         hod_n=hod_n,
                                         clf_seed=clf_seed,
                                         perf_opt=perf_opt,
                                         catl_info='members',
                                         return_len=True,
                                         print_filedir=print_filedir)
    # Checking number of catalogues
    if catl_pd_ii > (memb_len - 1):
        msg = '{0} `catl_pd_ii` ({1}) is OUT of range ({2})!'.format(
            file_msg, catl_pd_ii, memb_len)
        raise LSSUtils_Error(msg)
    ##
    ## Extracting group catalogue
    # i-th Galaxy catalogue
    memb_path = memb_arr[catl_pd_ii]
    # i-th Galaxy Group catalogue
    group_path = catl_sdss_dir(catl_kind=catl_kind,
                               catl_type=catl_type,
                               sample_s=sample_s,
                               halotype=halotype,
                               clf_method=clf_method,
                               hod_n=hod_n,
                               clf_seed=clf_seed,
                               perf_opt=perf_opt,
                               catl_info='groups',
                               print_filedir=print_filedir)
    ##
    ## Paths to catalogue
    # Mocks
    if catl_kind == 'mocks':
        group_path += os.path.basename(memb_path).replace('memb', 'group')
    # Data
    if catl_kind == 'data':
        group_path += os.path.basename(memb_path).replace('Gals', 'Group')
    # Checking that file exists
    fd.File_Exists(group_path)
    ##
    ## Reading in Catalogues
    memb_pd = fr.read_hdf5_file_to_pandas_DF(memb_path)
    group_pd = fr.read_hdf5_file_to_pandas_DF(group_path)
    ## Keys for the catalogues
    (gm_key, id_key, galtype_key) = catl_keys(catl_kind,
                                              perf_opt=perf_opt,
                                              return_type='list')
    ## Matching keys from Group catalogue
    if len(np.unique(memb_pd[id_key])) == len(np.unique(group_pd[id_key])):
        # Group column names
        group_colnames = np.sort(group_pd.columns.values)
        group_groupid = np.sort(np.unique(group_pd[id_key]))
        n_groups = len(group_groupid)
        n_memb = len(memb_pd)
        ## Sorting `memb_pd` by `id_key`
        # Member catalogue
        memb_pd.sort_values(by=id_key, inplace=True)
        memb_pd.reset_index(inplace=True, drop=True)
        # Group catalogue
        group_pd.sort_values(by=id_key, inplace=True)
        group_pd.reset_index(inplace=True, drop=True)
        ## Renaming columns
        g_colnames_dict = {ii: 'GG' + ii for ii in group_colnames}
        group_pd.rename(columns=g_colnames_dict, inplace=True)
        group_pd.rename(columns={'GG' + id_key: id_key}, inplace=True)
        ##
        ## Merging the 2 DataFrames
        memb_group_pd = pd.merge(left=memb_pd,
                                 right=group_pd,
                                 how='left',
                                 left_on=id_key,
                                 right_on=id_key)
    else:
        msg = '{0} Lengths of the 2 DataFrames (`memb_pd`, `group_pd`) '
        msg += 'do not match!'
        msg = msg.format(file_msg)
        raise LSSUtils_Error(msg)
    ##
    ## Returning DataFrames
    if return_memb_group:
        return_obj = (memb_group_pd, memb_pd, group_pd)
    else:
        return_obj = memb_group_pd

    return return_obj