def assign_noz(f, nofz, **kwargs): """read file 'f', substitute a columns with noz(z), with z in 'f' itself, and save in a file. Parameters ---------- f: file object or string file containing the catalogue nofz: function returns n(z) for all the z in the input file output ------ none accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +fmt: format of the output file """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name cat = np.loadtxt(f) #read the input catalogue cat[:, kwargs['nz_column']] = nofz(cat[:, kwargs['z_column']]) np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t') return None
def subtract_fromfile(f, absmin, **kwargs): """read file 'f', subtract absmin+offset from the first three columns and save the file Parameters ---------- f: file object or string file containing the catalogue absmin: list or array of three floats values to subtract from the first 3 columns output ------ none accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +offset: extra offset to add to absmin +fmt: format of the output file """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name cat = np.loadtxt(f) #read the input catalogue cat[:, :3] -= absmin + kwargs['offset'] #subtract the minimum np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t') return None
def rebin(fname, mbins, **kwargs): """ Rebin the power spectrum in file 'fn' and save it into the output file Parameters ---------- fname: string file name mbins: int number of bins to merge to create the input accepted kwargs that affects the function: +from_bin: skip the first 'from' bins +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +fmt: format of the output file """ ofile = mf.create_ofile_name(fname, **kwargs) # create the output file name if ofile == None: return None header_lines, ps = read_file(fname) #read the file # if required to skip some line fbin = kwargs.get('from_bin') if fbin is not None: ps = ps[fbin:, :] # check that the number of remaining bins is multiple of mbins and if not # cut the last bins pssize = ps.shape[0] psremaining = pssize % mbins if psremaining != 0: ps = ps[:-psremaining, :] # separate the power spectrum into the various components k, pk, pksn, nmodes = ps.T # reshape to simplify rebinning finalbins = ps.shape[0] // mbins kr = k.reshape([finalbins, mbins]) pkr = pk.reshape([finalbins, mbins]) pksnr = pksn.reshape([finalbins, mbins]) nmodesr = nmodes.reshape([finalbins, mbins]) # get the values for the rebinned power spectrum kr = np.average(kr, axis=1) pkr = np.average(pkr, axis=1, weights=nmodesr) pksnr = np.average(pksnr, axis=1, weights=nmodesr) nmodesr = nmodesr.sum(axis=1) # save the rebinned power spectrum with open(ofile, 'w') as of: of.writelines(header_lines) np.savetxt(of, np.vstack([kr, pkr, pksnr, nmodesr]).T, delimiter='\t', fmt=kwargs['fmt'])
def move_columns(f, from_columns, to_columns, **kwargs): """ Read file 'f', substitute the content of columns 'to_cols' with the one of 'from_cols' Parameters ---------- f: file object or string file containing the catalogue from_columns: list of ints list of columns to copy to_columns: list of ints list of columns where to copy output ------ none accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +swap: swap the columns instead of just moving +pandas: use pandas for the input +chunks: chunksize in pandas.read_table +substitute: value to substitute in 'from_columns' that are not in 'to_columns' +fmt: format of the output file """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name if kwargs['verbose']: print("Processing file '{}'".format(f)) #get the columns to move or swap substitute = None temp_from_cols, temp_to_cols = from_columns[:], to_columns[:] if kwargs['swap']: # swap the colums temp_from_cols.extend(to_columns) temp_to_cols.extend(from_columns) else: if kwargs['substitute'] is not None: #find the columns that need substitution substitute = [i for i in temp_from_cols if i not in temp_to_cols] if len(substitute) == 0: substitute = None #set back to None if there are no columns with value to substitute if kwargs['pandas']: use_pandas(f, ofile, temp_from_cols, temp_to_cols, substitute, **kwargs) else: cat = np.loadtxt(f) cat[:,temp_to_cols] = cat[:,temp_from_cols] if substitute is not None: cat[:, substitute] = kwargs['substitute'] np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
def cselect(f, constraint, **kwargs): """read file 'f', substitute a columns with noz(z), with z in 'f' itself, and save in a file. Parameters ---------- f: file object or string file containing the catalogue constr: string selection criterion output ------ none accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +pandas: use pandas for the input +chunks: chunksize in pandas.read_table +fmt: format of the output file """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name if ofile is None: #skip existing files if "skip" option used return None if kwargs['verbose']: print("Processing file '{}'".format(f)) if kwargs['pandas']: if kwargs['chunks'] is None: cat = pd.read_table(f, header=None, skiprows=mf.n_lines_comments(f), sep='\s') cat = cat[eval(constraint)] np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t') else: chunks = pd.read_table(f, header=None, sep='\s', skiprows=mf.n_lines_comments(f), chunksize=kwargs['chunks']) with open(ofile, 'w') as of: for cat in chunks: cat = cat[eval(constraint)] np.savetxt(of, cat, fmt=kwargs['fmt'], delimiter='\t') else: cat = np.loadtxt(f) cat = cat[eval(constraint), :] np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
def hist_save_return(f, readcols, hdata, **kwargs): """ Read the required 'col' from file 'f', do the histogram save it to file and return the it {common_params} output ------ {return_values} {common_kwargs} {kwargs_file} """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name hist, bin_centers, bin_edges = hist_return(f, readcols, hdata, **kwargs) # create the output 2d array outhist = np.vstack([bin_centers, bin_edges[:-1], bin_edges[1:], hist]).T np.savetxt(ofile, outhist, fmt=kwargs['fmt'], delimiter='\t') return hist, bin_centers, bin_edges
def convert_save(f, distance, **kwargs ): """ Read file *f*, converts ra, dec, z into cartesian coordinates, computing the comoving distance at redshift z stored in *distance*, and save to a new file Parameters ---------- f: file object or string file containing ra, dec and z distance: function function that evaluates the comoving distance at given redshift(s) kwargs: keyword arguments output ------ max, min: lists maximum and minimum values of x, y and z If kwargs['skip'] == True and the output file name already exists, a *None* is returned accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +pandas: use pandas for the input +chunks: chunksize in pandas.read_table +usecols: columns to read from the input files. the first three must be ra, dec and redshift +negative_z: check or not for negative redshifts and perform action [None, 'skip', 'tozero'] +fmt: format of the output file """ if kwargs['verbose']: print("Processing file '{}'".format(f)) ofile = mf.create_ofile_name(f, **kwargs) # create the output file name if kwargs['pandas']: return use_pandas(f, ofile, distance, **kwargs) else: return use_numpy(f, ofile, distance, **kwargs)
def hist_ndensity_save_return(f, readcols, hdata, volume_z, **kwargs): """ Read the required 'col' from file 'f', do the histogram of the number density, save it to file and return it {common_params} volume_z: list effective volume in each bin of the histogram. Warning: kwargs['range'] cannot be *None* and kwargs['nbins'] == len(volume_z). No check performed output ------ {return_values} {common_kwargs} {kwargs_file} """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name hist, bin_centers, bin_edges = hist_ndensity_return( f, readcols, hdata, volume_z, **kwargs) # create the output 2d array outhist = np.vstack([bin_centers, bin_edges[:-1], bin_edges[1:], hist]).T np.savetxt(ofile, outhist, fmt=kwargs['fmt'], delimiter='\t') return hist, bin_centers, bin_edges
def columns_operations(f, operations, to_column, **kwargs): """ read file, performe the desired operations between columns, save it and write to a file. Signature: columns_operations("file.dat", 'c3+c6-1', 4) #read file, add content of column 3 and column 6, subtract 1 and save the result in column 4. Parameters ---------- f: file object or string file containing the catalogue operations: string columns with operations to perform to_columns int save there the result of operations output ------ none accepted kwargs that affects the function +substitute: substitute the content of the columns involved with operations with this value, if not None +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +pandas: use pandas for the input +chunks: chunksize in pandas.read_table +fmt: format of the output file """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name if kwargs['verbose']: print("Processing file '{}'".format(f)) pattern = re.compile(r"c(\d+?)") #re pattern with the columns name if kwargs['pandas']: #expression to evaluate to execute the operation to_evaluate = pattern.sub("cat[\\1]", operations) else: to_evaluate = pattern.sub("cat[:,\\1]", operations) #columns used in the operation columns_read = [int(s) for s in pattern.findall(operations)] # read the input catalogue if kwargs['pandas']: if kwargs['chunks'] is None: cat = pd.read_table(f, header=None, skiprows=mf.n_lines_comments(f), sep='\s') new_column = eval(to_evaluate) # do the operation if kwargs['substitute'] is not None: cat[columns_read] = kwargs['substitute'] cat[to_column] = new_column #copy the result of the operation np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t') else: chunks = pd.read_table(f, header=None, sep='\s', skiprows=mf.n_lines_comments(f), chunksize=kwargs['chunks']) with open(ofile, 'w') as of: for cat in chunks: new_column = eval(to_evaluate) # do the operation if kwargs['substitute'] is not None: cat[columns_read] = kwargs['substitute'] cat[to_column] = new_column #copy the result of the operation np.savetxt(of, cat, fmt=kwargs['fmt'], delimiter='\t') else: cat = np.loadtxt(f) new_column = eval(to_evaluate) # do the operation if kwargs['substitute'] is not None: cat[:, columns_read] = kwargs['substitute'] cat[:, to_column] = new_column #copy the result of the operation np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
def downsample_noz(f, interpf, zcol, **kwargs): """ read the input files, downsample it and save it Parameters ---------- f: file object or string file containing the catalogue interpf: callable function that returns the fraction used for the downsample zcol: int column in the input file to use for the downsampling output ------ accepted kwargs that affects the function +substitute: substitute the content of the columns involved with operations with this value, if not None +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +pandas: use pandas for the input +chunks: chunksize in pandas.read_table +fmt: format of the output file """ ofile = mf.create_ofile_name(f, **kwargs) # create the output file name if kwargs['verbose']: print("Processing file '{}'".format(f)) # read the input catalogue if kwargs['pandas']: if kwargs['chunks'] is None: cat = pd.read_table(f, header=None, skiprows=mf.n_lines_comments(f), sep='\s') to_keep = cat[zcol] # save the column to use for the selection to_keep = np.random.rand(to_keep.size) <= interpf( to_keep) #random select objects to keep np.savetxt(ofile, cat[to_keep], fmt=kwargs['fmt'], delimiter='\t') else: chunks = pd.read_table(f, header=None, sep='\s', skiprows=mf.n_lines_comments(f), chunksize=kwargs['chunks']) with open(ofile, 'w') as of: for cat in chunks: to_keep = cat[ zcol] # save the column to use for the selection to_keep = np.random.rand(to_keep.size) <= interpf( to_keep) #random select objects to keep np.savetxt(of, cat[to_keep], fmt=kwargs['fmt'], delimiter='\t') else: cat = np.loadtxt(f) to_keep = cat[:, zcol] # save the column to use for the selection to_keep = np.random.rand(to_keep.size) <= interpf( to_keep) #random select objects to keep np.savetxt(ofile, cat[to_keep, :], fmt=kwargs['fmt'], delimiter='\t')
def substitute_from_file(fname, col, replacementa, **kwargs): """ Read file 'fname' and replace 'column' matching 'replacement_arr[:,0]' with 'replacement_arr[:,1]' Parameters ---------- fname: file object or string file containing the catalogue col: integer column to use replacementa: 1D or 2D array function that execute the replacement output ------ none accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +pandas: use pandas for the input +chunks: chunksize in pandas.read_table +fmt: format of the output file """ def replace(x, repl_array): """ if 'repl_array' is a 1D array, return repl_array[x] if 'repl_array is a Nx2 array, returns repl_array[:,1] where x matches repl_array[:,0] """ if repl_array.ndim == 1: if repl_array.dtype != int: print("Warning: 'x' will be casted to integer") return repl_array[x.astype(np.int)] else: if x.dtype == int and repl_array[:, 1].dtype == float: _x = x.astype(np.float) else: _x = x.copy() for (r0, r1) in repl_array: _x[x == r0] = r1 return _x if kwargs['verbose']: print("Processing file '{}'".format(fname)) ofile = mf.create_ofile_name(fname, **kwargs) # create the output file name if kwargs['pandas']: if kwargs['chunks'] is None: cat = pd.read_table(fname, header=None, skiprows=mf.n_lines_comments(fname), sep='\s') cat[col] = replace(cat[col], replacementa) np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t') else: chunks = pd.read_table(fname, header=None, sep='\s', skiprows=mf.n_lines_comments(fname), chunksize=kwargs['chunks']) with open(ofile, 'w') as fo: #open the output file for cat in chunks: #loop over the chunks cat[col] = replace(cat[col], replacementa) np.savetxt(fo, cat, fmt=kwargs['fmt'], delimiter='\t') else: cat = np.loadtxt(fname) cat[:, col] = replace(cat[:, col], replacementa) np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')
def fits2ascii(fname, selected_columns, operations=None, **kwargs): """ Read some columns a fits file 'fname' and save the output into an ascii table. Operations between columns permitted. The output file has the order given in columns Signature: fits2ascii( "file.fits", 0 ) #read and save the given column fits2ascii( "file.fits", [0,4,5,3] ) #read and save the given columns (in that order) fits2ascii( "file.fits", ['a', 'b', 'n', 'c'] ) #as before but with column name fits2ascii( "file.fits", ['a', 'b+n-1', 'c'] ) #save a 3 columns ascii table with 'a' in the first, the result of the fitsio.read_column('b')+fitsio.read_column('n')-1 in the second and 'c' in the third Parameters ---------- fname: string file containing the catalogue selected_columns: integer, string, list of integers or strings. columns to read. If strings, operation permitted: in this case the *name* of the column, *not* the number, must be given operations: regular expression pattern pattern containing the list of accepted operators. If None no operation done output ------ none accepted kwargs that affects the function +verbose: verbose mode [True|False] +replace: replace string *replace[0]* with *replace[1]* in f.name +insert: insert string *insert[0]* before *insert[1]* in f.name +skip: existing file names skipped [True|False] +overwrite: existing file names overwritten [True|False] +fmt: format of the output file """ if kwargs['verbose']: print("extracting columns '{0}' from file '{1}'".format( selected_columns, fname)) ofile = mf.create_ofile_name(fname, **kwargs) # create the output file name # check if there are operations to execute check_operations = [] if operations is None: check_operations = False else: for c in selected_columns: try: n_matches = len(operations.findall(c)) except TypeError: # The number of the column given check_operations.append(False) else: #If a string is given if n_matches > 0: check_operations.append(True) else: check_operations.append(False) # If there are no operations to perform, open the file, read the columns # and save them if sum(check_operations) == 0: cat = fitsio.read(fname, columns=selected_columns)[selected_columns] # If there are operations involved else: with fitsio.FITS(fname) as fits: cat = read_with_operations(fits[1], selected_columns, operations) np.savetxt(ofile, cat, fmt=kwargs['fmt'], delimiter='\t')