Beispiel #1
0
def assign_noz(f, nofz, **kwargs):
    """read file 'f', substitute a columns with noz(z), with z in 'f' itself, and
    save in a file.
    Parameters
    ----------
    f: file object or string
        file containing the catalogue
    nofz: function
        returns n(z) for all the z in the input file
    output
    ------
    none

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +fmt: format of the output file
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name

    cat = np.loadtxt(f)  #read the input catalogue

    cat[:, kwargs['nz_column']] = nofz(cat[:, kwargs['z_column']])

    np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
    return None
def subtract_fromfile(f, absmin, **kwargs):
    """read file 'f', subtract absmin+offset from the first three columns and
    save the file
    Parameters
    ----------
    f: file object or string
        file containing the catalogue
    absmin: list or array of three floats
        values to subtract from the first 3 columns
    output
    ------
    none

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +offset: extra offset to add to absmin
    +fmt: format of the output file
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name

    cat = np.loadtxt(f)  #read the input catalogue
    cat[:, :3] -= absmin + kwargs['offset']  #subtract the minimum

    np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
    return None
def rebin(fname, mbins, **kwargs):
    """
    Rebin the power spectrum in file 'fn' and save it into the output file
    Parameters
    ----------
    fname: string
        file name
    mbins: int
        number of bins to merge to create the input

    accepted kwargs that affects the function:
    +from_bin: skip the first 'from' bins
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +fmt: format of the output file
    """

    ofile = mf.create_ofile_name(fname,
                                 **kwargs)  # create the output file name
    if ofile == None:
        return None

    header_lines, ps = read_file(fname)  #read the file

    # if required to skip some line
    fbin = kwargs.get('from_bin')
    if fbin is not None:
        ps = ps[fbin:, :]
    # check that the number of remaining bins is multiple of mbins and if not
    # cut the last bins
    pssize = ps.shape[0]
    psremaining = pssize % mbins
    if psremaining != 0:
        ps = ps[:-psremaining, :]
    # separate the power spectrum into the various components
    k, pk, pksn, nmodes = ps.T
    # reshape to simplify rebinning
    finalbins = ps.shape[0] // mbins
    kr = k.reshape([finalbins, mbins])
    pkr = pk.reshape([finalbins, mbins])
    pksnr = pksn.reshape([finalbins, mbins])
    nmodesr = nmodes.reshape([finalbins, mbins])

    # get the values for the rebinned power spectrum
    kr = np.average(kr, axis=1)
    pkr = np.average(pkr, axis=1, weights=nmodesr)
    pksnr = np.average(pksnr, axis=1, weights=nmodesr)
    nmodesr = nmodesr.sum(axis=1)

    # save the rebinned power spectrum
    with open(ofile, 'w') as of:
        of.writelines(header_lines)
        np.savetxt(of,
                   np.vstack([kr, pkr, pksnr, nmodesr]).T,
                   delimiter='\t',
                   fmt=kwargs['fmt'])
def move_columns(f, from_columns, to_columns, **kwargs):
    """
    Read file 'f', substitute the content of columns 'to_cols' with the one of
    'from_cols'
    Parameters
    ----------
    f: file object or string
        file containing the catalogue
    from_columns: list of ints
        list of columns to copy
    to_columns: list of ints
        list of columns where to copy
    output
    ------
    none

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +swap: swap the columns instead of just moving
    +pandas: use pandas for the input
    +chunks: chunksize in pandas.read_table
    +substitute: value to substitute in 'from_columns' that are not in 'to_columns'
    +fmt: format of the output file
    """
    ofile = mf.create_ofile_name(f, **kwargs) # create the output file name
    if kwargs['verbose']:
        print("Processing file '{}'".format(f))
    #get the columns to move or swap
    substitute = None
    temp_from_cols, temp_to_cols = from_columns[:], to_columns[:]
    if kwargs['swap']: # swap the colums
        temp_from_cols.extend(to_columns)
        temp_to_cols.extend(from_columns)
    else:
        if kwargs['substitute'] is not None: #find the columns that need substitution
            substitute = [i for i in temp_from_cols if i not in temp_to_cols]
            if len(substitute) == 0:
                substitute = None #set back to None if there are no columns with value to substitute

    if kwargs['pandas']:
        use_pandas(f, ofile, temp_from_cols, temp_to_cols, substitute, **kwargs)
    else:
        cat = np.loadtxt(f)
        cat[:,temp_to_cols] = cat[:,temp_from_cols]
        if substitute is not None:
            cat[:, substitute] = kwargs['substitute']
        np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
Beispiel #5
0
def cselect(f, constraint, **kwargs):
    """read file 'f', substitute a columns with noz(z), with z in 'f' itself, and
    save in a file.
    Parameters
    ----------
    f: file object or string
        file containing the catalogue
    constr: string
        selection criterion
    output
    ------
    none

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +pandas: use pandas for the input
    +chunks: chunksize in pandas.read_table
    +fmt: format of the output file
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name
    if ofile is None:  #skip existing files if "skip" option used
        return None
    if kwargs['verbose']:
        print("Processing file '{}'".format(f))

    if kwargs['pandas']:
        if kwargs['chunks'] is None:
            cat = pd.read_table(f,
                                header=None,
                                skiprows=mf.n_lines_comments(f),
                                sep='\s')
            cat = cat[eval(constraint)]
            np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
        else:
            chunks = pd.read_table(f,
                                   header=None,
                                   sep='\s',
                                   skiprows=mf.n_lines_comments(f),
                                   chunksize=kwargs['chunks'])
            with open(ofile, 'w') as of:
                for cat in chunks:
                    cat = cat[eval(constraint)]
                    np.savetxt(of, cat, fmt=kwargs['fmt'], delimiter='\t')
    else:
        cat = np.loadtxt(f)
        cat = cat[eval(constraint), :]
        np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
Beispiel #6
0
def hist_save_return(f, readcols, hdata, **kwargs):
    """
    Read the required 'col' from file 'f', do the histogram 
    save it to file and return the it 
    {common_params}
    output
    ------
    {return_values}
    
    {common_kwargs}
    {kwargs_file}
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name
    hist, bin_centers, bin_edges = hist_return(f, readcols, hdata, **kwargs)
    # create the output 2d array
    outhist = np.vstack([bin_centers, bin_edges[:-1], bin_edges[1:], hist]).T
    np.savetxt(ofile, outhist, fmt=kwargs['fmt'], delimiter='\t')
    return hist, bin_centers, bin_edges
Beispiel #7
0
def convert_save(f, distance, **kwargs ):
    """
    Read file *f*, converts ra, dec, z into cartesian coordinates, computing the
    comoving distance at redshift z stored in *distance*, and save to a new file
    Parameters
    ----------
    f: file object or string
        file containing ra, dec and z
    distance: function
        function that evaluates the comoving distance at given redshift(s)
    kwargs: keyword arguments
    output
    ------
    max, min: lists
        maximum and minimum values of x, y and z
        If kwargs['skip'] == True and the output file name already exists, a *None*
        is returned

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +pandas: use pandas for the input
    +chunks: chunksize in pandas.read_table
    +usecols: columns to read from the input files. the first three must be ra,
        dec and redshift
    +negative_z: check or not for negative redshifts and perform action [None, 'skip', 'tozero']
    +fmt: format of the output file
    """
    if kwargs['verbose']:
        print("Processing file '{}'".format(f))
    ofile = mf.create_ofile_name(f, **kwargs) # create the output file name

    if kwargs['pandas']:
        return use_pandas(f, ofile, distance, **kwargs)
    else:
        return use_numpy(f, ofile, distance, **kwargs)
Beispiel #8
0
def hist_ndensity_save_return(f, readcols, hdata, volume_z, **kwargs):
    """
    Read the required 'col' from file 'f', do the histogram of the number
    density, save it to file and return it
    {common_params}
    volume_z: list
        effective volume in each bin of the histogram. 
        Warning: kwargs['range'] cannot be *None* and 
        kwargs['nbins'] == len(volume_z). No check performed
    output
    ------
    {return_values}

    {common_kwargs}
    {kwargs_file}
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name
    hist, bin_centers, bin_edges = hist_ndensity_return(
        f, readcols, hdata, volume_z, **kwargs)
    # create the output 2d array
    outhist = np.vstack([bin_centers, bin_edges[:-1], bin_edges[1:], hist]).T
    np.savetxt(ofile, outhist, fmt=kwargs['fmt'], delimiter='\t')
    return hist, bin_centers, bin_edges
def columns_operations(f, operations, to_column, **kwargs):
    """
    read file, performe the desired operations between columns, save it and
    write to a file.

    Signature: 
        columns_operations("file.dat", 'c3+c6-1', 4) 
        #read file, add content of column 3 and column 6, subtract 1 and save
        the result in column 4. 

    Parameters
    ----------
    f: file object or string
        file containing the catalogue
    operations: string
        columns with operations to perform
    to_columns int
        save there the result of operations
    output
    ------
    none

    accepted kwargs that affects the function
    +substitute: substitute the content of the columns involved with operations
        with this value, if not None
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +pandas: use pandas for the input
    +chunks: chunksize in pandas.read_table
    +fmt: format of the output file
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name
    if kwargs['verbose']:
        print("Processing file '{}'".format(f))

    pattern = re.compile(r"c(\d+?)")  #re pattern with the columns name
    if kwargs['pandas']:  #expression to evaluate to execute the operation
        to_evaluate = pattern.sub("cat[\\1]", operations)
    else:
        to_evaluate = pattern.sub("cat[:,\\1]", operations)
    #columns used in the operation
    columns_read = [int(s) for s in pattern.findall(operations)]

    # read the input catalogue
    if kwargs['pandas']:
        if kwargs['chunks'] is None:
            cat = pd.read_table(f,
                                header=None,
                                skiprows=mf.n_lines_comments(f),
                                sep='\s')
            new_column = eval(to_evaluate)  # do the operation
            if kwargs['substitute'] is not None:
                cat[columns_read] = kwargs['substitute']
            cat[to_column] = new_column  #copy the result of the operation
            np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
        else:
            chunks = pd.read_table(f,
                                   header=None,
                                   sep='\s',
                                   skiprows=mf.n_lines_comments(f),
                                   chunksize=kwargs['chunks'])
            with open(ofile, 'w') as of:
                for cat in chunks:
                    new_column = eval(to_evaluate)  # do the operation
                    if kwargs['substitute'] is not None:
                        cat[columns_read] = kwargs['substitute']
                    cat[to_column] = new_column  #copy the result of the operation
                    np.savetxt(of, cat, fmt=kwargs['fmt'], delimiter='\t')
    else:
        cat = np.loadtxt(f)
        new_column = eval(to_evaluate)  # do the operation
        if kwargs['substitute'] is not None:
            cat[:, columns_read] = kwargs['substitute']
        cat[:, to_column] = new_column  #copy the result of the operation
        np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
def downsample_noz(f, interpf, zcol, **kwargs):
    """
    read the input files, downsample it and save it
    Parameters
    ----------
    f: file object or string
        file containing the catalogue
    interpf: callable
        function that returns the fraction used for the downsample
    zcol: int
        column in the input file to use for the downsampling
    output
    ------

    accepted kwargs that affects the function
    +substitute: substitute the content of the columns involved with operations
        with this value, if not None
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +pandas: use pandas for the input
    +chunks: chunksize in pandas.read_table
    +fmt: format of the output file
    """
    ofile = mf.create_ofile_name(f, **kwargs)  # create the output file name
    if kwargs['verbose']:
        print("Processing file '{}'".format(f))

    # read the input catalogue
    if kwargs['pandas']:
        if kwargs['chunks'] is None:
            cat = pd.read_table(f,
                                header=None,
                                skiprows=mf.n_lines_comments(f),
                                sep='\s')
            to_keep = cat[zcol]  # save the column to use for the selection
            to_keep = np.random.rand(to_keep.size) <= interpf(
                to_keep)  #random select objects to keep
            np.savetxt(ofile, cat[to_keep], fmt=kwargs['fmt'], delimiter='\t')
        else:
            chunks = pd.read_table(f,
                                   header=None,
                                   sep='\s',
                                   skiprows=mf.n_lines_comments(f),
                                   chunksize=kwargs['chunks'])
            with open(ofile, 'w') as of:
                for cat in chunks:
                    to_keep = cat[
                        zcol]  # save the column to use for the selection
                    to_keep = np.random.rand(to_keep.size) <= interpf(
                        to_keep)  #random select objects to keep
                    np.savetxt(of,
                               cat[to_keep],
                               fmt=kwargs['fmt'],
                               delimiter='\t')
    else:
        cat = np.loadtxt(f)
        to_keep = cat[:, zcol]  # save the column to use for the selection
        to_keep = np.random.rand(to_keep.size) <= interpf(
            to_keep)  #random select objects to keep
        np.savetxt(ofile, cat[to_keep, :], fmt=kwargs['fmt'], delimiter='\t')
Beispiel #11
0
def substitute_from_file(fname, col, replacementa, **kwargs):
    """
    Read file 'fname' and replace 'column' matching 'replacement_arr[:,0]' with
    'replacement_arr[:,1]'

    Parameters
    ----------
    fname: file object or string
        file containing the catalogue
    col: integer
        column to use
    replacementa: 1D or 2D array 
        function that execute the replacement
        
    output
    ------
    none

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +pandas: use pandas for the input
    +chunks: chunksize in pandas.read_table
    +fmt: format of the output file
    """
    def replace(x, repl_array):
        """
        if 'repl_array' is a 1D array, return repl_array[x]
        if 'repl_array is a Nx2 array, returns repl_array[:,1] where x matches repl_array[:,0]
        """
        if repl_array.ndim == 1:
            if repl_array.dtype != int:
                print("Warning: 'x' will be casted to integer")
            return repl_array[x.astype(np.int)]
        else:
            if x.dtype == int and repl_array[:, 1].dtype == float:
                _x = x.astype(np.float)
            else:
                _x = x.copy()
            for (r0, r1) in repl_array:
                _x[x == r0] = r1
            return _x

    if kwargs['verbose']:
        print("Processing file '{}'".format(fname))
    ofile = mf.create_ofile_name(fname,
                                 **kwargs)  # create the output file name

    if kwargs['pandas']:
        if kwargs['chunks'] is None:
            cat = pd.read_table(fname,
                                header=None,
                                skiprows=mf.n_lines_comments(fname),
                                sep='\s')
            cat[col] = replace(cat[col], replacementa)
            np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
        else:
            chunks = pd.read_table(fname,
                                   header=None,
                                   sep='\s',
                                   skiprows=mf.n_lines_comments(fname),
                                   chunksize=kwargs['chunks'])
            with open(ofile, 'w') as fo:  #open the output file
                for cat in chunks:  #loop over the chunks
                    cat[col] = replace(cat[col], replacementa)
                    np.savetxt(fo, cat, fmt=kwargs['fmt'], delimiter='\t')

    else:
        cat = np.loadtxt(fname)
        cat[:, col] = replace(cat[:, col], replacementa)
        np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
def fits2ascii(fname, selected_columns, operations=None, **kwargs):
    """
    Read some columns a fits file 'fname' and save the output into an ascii table.
    Operations between columns permitted. The output file has the order given
    in columns

    Signature: 
        fits2ascii( "file.fits", 0 )  #read and save the given column
        fits2ascii( "file.fits", [0,4,5,3] )  #read and save the given columns (in that order)
        fits2ascii( "file.fits", ['a', 'b', 'n', 'c'] ) #as before but with column name
        fits2ascii( "file.fits", ['a', 'b+n-1', 'c'] ) #save a 3 columns ascii table with 'a' in the first,
        the result of the fitsio.read_column('b')+fitsio.read_column('n')-1 in the second and 'c' in the third

    Parameters
    ----------
    fname: string
        file containing the catalogue
    selected_columns: integer, string, list of integers or strings.
        columns to read. If strings, operation permitted: in this case
        the *name* of the column, *not* the number, must be given
    operations: regular expression pattern
        pattern containing the list of accepted operators. If None no operation
        done

    output
    ------
    none

    accepted kwargs that affects the function
    +verbose: verbose mode [True|False] 
    +replace: replace string *replace[0]* with *replace[1]* in f.name
    +insert: insert string *insert[0]* before *insert[1]* in f.name
    +skip: existing file names skipped [True|False]
    +overwrite: existing file names overwritten [True|False]
    +fmt: format of the output file
    """
    if kwargs['verbose']:
        print("extracting columns '{0}' from file '{1}'".format(
            selected_columns, fname))
    ofile = mf.create_ofile_name(fname,
                                 **kwargs)  # create the output file name

    # check if there are operations to execute
    check_operations = []
    if operations is None:
        check_operations = False
    else:
        for c in selected_columns:
            try:
                n_matches = len(operations.findall(c))
            except TypeError:  # The number of the column given
                check_operations.append(False)
            else:  #If a string is given
                if n_matches > 0:
                    check_operations.append(True)
                else:
                    check_operations.append(False)
    # If there are no operations to perform, open the file, read the columns
    # and save them
    if sum(check_operations) == 0:
        cat = fitsio.read(fname, columns=selected_columns)[selected_columns]
    # If there are operations involved
    else:
        with fitsio.FITS(fname) as fits:
            cat = read_with_operations(fits[1], selected_columns, operations)
    np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')