def _qc(self, file_name, out_file_name=''): """ _qc() [private] Purpose: Carries out a quality control operation on a single file. Parameters: file_name [type=str] The name of the file to quality control. out_file_name [type=str] Optional file name specifying where to write the output of the quality control procedure. If not specified, the file given by file_name will be overwritten with quality-controlled data. Returns: [nothing] """ file = DataIO(file_name=file_name, mode="rw") file_data_good = {} file_data_dims = () for variables, function in self.rules.iteritems(): # Loop through all the rules that we've defined and find the indexes of all the good data. if variables.find(",") > -1: variables = re.split(", *", variables) else: variables = [ variables ] for var in variables: if var not in file_data_good: file_data_dims = file.get_variable_attribute(var, 'dimensions') file_data_good[var] = np.array(np.ones(tuple([ file.get_dimension(d) for d in file_data_dims ]), dtype=bool)) # Get the variable data from the file (returns as a list of numpy arrays) file_data = file.get_variable(variables) # Plug the list straight into the quality control function. Do the element-wise "and" procedure between the file_data_good array and the returned array immediately. good = function(*file_data) for var in variables: file_data_good[var] &= good # file_data_good &= file_data_good[:,5].data.reshape((file.get_dimension(file_data_dims[0]), 1)) for var in file_data_good.keys(): # Do the splicing for every variable with the same dimensions as the variables we've qc'ed. qc_data = np.where(file_data_good[var], file.get_variable(var), np.nan) file.set_variable(var, qc_data) # new_dims = [] # keep_indexes = [] # for idx, var_dim in enumerate(var_dims): # if var_dim in file_data_dims: # new_dims.append("%s_qc" % var_dim) # keep_indexes.append(np.unique(np.where(file_data_good)[idx])) # else: # new_dims.append(var_dim) # keep_indexes.append(np.arange(file.get_dimension(var_dim), dtype=np.int32)) # file.splice_variable(variable, tuple(new_dims), tuple(keep_indexes)) # Sew 'er up file.close(out_file_name=out_file_name) return
def main(): ap = argparse.ArgumentParser() ap.add_argument('--data-file', dest='data_file', default="data/ECMWF_Global_2p5_20120203_0000.nc") ap.add_argument('--attributes', dest='attrs', action='store_true') ap.add_argument('--dimensions', dest='dims', action='store_true') ap.add_argument('--variables', dest='vars', action='store_true') ap.add_argument('--variable', dest='var', default="") args = ap.parse_args() data_file = DataIO(file_name=args.data_file) if args.dims: print "Dimensions:" dims = data_file.get_dimensions().items() max_length = max([len(n[0]) for n in dims]) for dim_name, dim in dims: print " %s%s" % (dim_name, " " * (max_length - len(dim_name))), len(dim) if args.attrs: print "File Attributes:" attr_list = data_file.get_file_attribute_list() max_length = max([len(n) for n in attr_list]) for attribute in sorted(attr_list): print " %s%s" % (attribute, " " * (max_length - len(attribute))), data_file.get_file_attribute(attribute) if args.vars: print "Variables:" for variable in sorted(data_file.get_variable_list()): print " %s" % variable if args.var != "": print "Attributes for Variable %s" % args.var attr_list = data_file.get_variable_attribute_list(args.var) max_length = max([len(n) for n in attr_list]) for attribute in sorted(attr_list): print " %s%s" % (attribute, " " * (max_length - len(attribute))), data_file.get_variable_attribute(args.var, attribute) dim_list = data_file.get_variable_dimensions(args.var) print "Dimensions: ",dim_list return
#!/usr/local/epd/bin/python import sys sys.path.append("../") from util.dataIO import DataIO import numpy as np df = DataIO("wrfout_d01_PLEV.nc", mode="rw") missing_value = df.get_variable_attribute('T', 'missing_value') df.set_variable_attribute('P', 'missing_value', missing_value) df.close()