def check_undefined_dimensions(dim_dict, metafname=None, log=False): ''' function checks if there are any undefined dimensions ,if so - goes to metafile, and loads undefined dimensions first from data file input: dim_dict [dictionary] - dictionary, where keys - dimension names, values - dimension sizes. Note: values could be any integer >0, None, or '*' metafname [str] - fullpath to metafile. Default = None out: dim_dict [dictionary] - modified input dictionary, where keys - dimension names, values - dimension sizes, from now on without any undefined dimensions Note: values could be any integer >0, None dims_vals [dict] - dictionary with information about unique values (sorted in ascending order) of a given dimensions {"dim_name": [array of unique values], ...} ''' import read_metafile import os path2metafile = os.path.split(metafname)[0] unknown_dims = [] unknown_flg = False for name, val in dim_dict.iteritems(): if val == '*': # dim=* mean that dimension is unknown. first read it from data file unknown_dims.append(name) unknown_flg = True dims_vals = {} if unknown_flg: # if there are unknown dimensions.... first read them from data file if metafname is not None: metadata = read_metafile.read_metadata(metafname) dims, dims_vals = read_metafile.getDimVal(metadata, path2metafile, unknown_dims, log=log) for d in dims.keys(): if d not in unknown_dims: err_msg = 'Dimension "{0}" declared in Metafile do not match any of the undefined dimension in CDL\ file {1}'.format(d, unknown_dims) raise ValueError(err_msg) else: dim_dict[d] = dims [d] else: err_msg = 'Metafile path is missing. CDL file declares variables with unknown dimension size: metafile is needed' raise ValueError(err_msg) #print dim_dict return dim_dict, dims_vals
def make_cdl(cdlFname, metafname=None, outpath='', log=False): ''' create a copy of existing CDL format file, where missing data is substituted with "*" sign and modify it file step by step: _________________________________ 1) reading CDL format file, extracting info from sections: "dimensions:", "variables:", !NOT IMPLEMENTED ! "data" 2) reading observation data if nessesary (dimensions are unknown dim="*" ) ''' sname = 'make_cdl():' import shutil import read_cdl import os import read_metafile import funcs import re # first make a copy to work with path , fname = os.path.split(os.path.abspath(cdlFname)) pathmeta , fnamemeta = os.path.split(os.path.abspath(metafname)) name, extension = fname.split('.') #newFilename = outpath+name+'_modified.'+extension tempFilename = cdlFname+'.tmp' newFileName = os.path.join(outpath, "_"+fname) #shutil.copyfile(cdlFname, cdlFname+'.bak') # proceed f = read_cdl.read_file(cdlFname, comments='//') parts, nparts, fname = read_cdl.get_parts(f) dims = read_cdl.process_dimensions(parts[0], log=log) Vars = read_cdl.process_variables(parts[1], log=log) read_cdl.process_variables_attributes(parts[1], Vars, log=log) read_cdl.check_variables_and_dimensions(dims, Vars) dimensions, udimension_arrays = read_cdl.check_undefined_dimensions(dims, metafname=metafname, log=log) Vars = read_cdl.extend_dimensions_with_sizes(Vars, dimensions) replace_undefined_dimensions(cdlFname, newFileName, dimensions, log=log) #replace_undefined_dimensions(cdlFname, newFilename, dims) metaData = read_metafile.read_metadata(metafname, log=log) # undefined_vars = read_cdl.get_undefVars() with open(newFileName, 'r') as fIn: if log: print sname, "creating temp file {0}".format(tempFilename) with open(tempFilename, 'w+') as fOut: for i, lineRaw in enumerate(fIn): #cycling through lines in old file... lineRaw = lineRaw.decode('utf8') if len(lineRaw.split('//')) > 1: # ignore comments line = lineRaw.split("//")[0] else: line = lineRaw if re.match('.*?=\s*\*.*', line): # if * is present after sign = (undefined variable is here, cause there are already no undefined dimensions at this step) uVarName = re.match('\s*?(.*?)\s*=.*', line).group(1).strip() if log: print sname, "line {0} >>> Undefined variable '{1}'' found in CDL ".format(i+1, uVarName) if uVarName in Vars.keys(): fOut.write('\t{0} =\n'.format(uVarName)) # first write variable name... # creating array with data... if log: print sname, 'Creating array based on: "{0}" = {1}'.format(uVarName, Vars[uVarName]) useCols = create_useCols_from_meta(metaData, Vars, uVarName, log=log) if log: print sname, 'variable {0} >>> useCols = {1}'.format (uVarName, useCols) obs = funcs.loadASCII(os.path.join(pathmeta, metaData['dataFname']), dtype=Vars[uVarName][0], delimiter=metaData['delimiter'], skiprows=metaData['skiprows'], usecols=useCols, log=log) #for k ,v in Vars.iteritems(): print k, '>>>', v if "_FillValue" in Vars[uVarName][2].keys(): fv = Vars[uVarName][2]["_FillValue"][0] else: fv = -999 data = funcs.create_array_from_data(Vars[uVarName][3], Vars[uVarName][1], uVarName, obs, fill_value=fv, log=log) # now write it to cdl in column order.... data = data.flatten(order="C") # flatten array in column order as in C-language if log: print sname, 'writing to files data-section values of variable <{0}>... be patient - this may take few minutes'.format(uVarName) for i, val in enumerate(data): # write to file... string by string if i != (data.size - 1): # if not last element fOut.write('\t\t{0} ,\n'.format(val)) else: #if last element fOut.write('\t\t{0} ;\n'.format(val)) else: err_msg = 'CDL file contains undefined variable {0}, which is missing in dictionary "Vars"'.format(uVarName) raise ValueError(err_msg+'\n\n') else: if log : print sname, 'line {0} >>> no changes'.format(i+1) fOut.write(lineRaw) # if line without * => rewrite it fOut.close() fIn.close() try: os.remove(newFileName) # if file exist - remove itrename() doesn't overwrite on Windows except: pass # if file do not exist, do nothing os.rename(tempFilename, newFileName) if log: print sname, "renaming temp file into >>> {0}".format(newFileName)