예제 #1
0
def check_undefined_dimensions(dim_dict, metafname=None, log=False):
    '''
    function checks if there are any undefined dimensions ,if so - goes to metafile, and loads
    undefined dimensions first from data file

    input:
        dim_dict [dictionary] - dictionary, where keys - dimension names, values - dimension sizes.
                                Note: values could be any integer >0, None, or '*'
        metafname [str] - fullpath to metafile. Default = None
    out:
        dim_dict [dictionary] - modified input dictionary, where keys - dimension names, values - dimension sizes,
                                from now on without any undefined dimensions
                                Note: values could be any integer >0, None
        dims_vals [dict] - dictionary with information about unique values (sorted in ascending order) of a given dimensions
                            {"dim_name": [array of unique values], ...}
    '''
    import read_metafile
    import os

    
    path2metafile = os.path.split(metafname)[0]
    unknown_dims = []
    unknown_flg = False

    for name, val in dim_dict.iteritems():
        if val == '*':  # dim=* mean that dimension is unknown. first read it from data file
            unknown_dims.append(name)
            unknown_flg = True

    dims_vals = {}
    if unknown_flg:  # if there are unknown dimensions.... first read them from data file
        if metafname is not None:
            metadata = read_metafile.read_metadata(metafname)
            dims, dims_vals = read_metafile.getDimVal(metadata, path2metafile, unknown_dims, log=log)
            for d in dims.keys():
                if d not in unknown_dims:
                    err_msg = 'Dimension "{0}" declared in Metafile do not match any of the undefined dimension in CDL\
                                file {1}'.format(d, unknown_dims)
                    raise ValueError(err_msg)
                else:
                    dim_dict[d] = dims [d]
        else:
            err_msg = 'Metafile path is missing. CDL file declares variables with unknown dimension size: metafile is needed'
            raise ValueError(err_msg)
    #print dim_dict
    return dim_dict, dims_vals
예제 #2
0
def make_cdl(cdlFname, metafname=None, outpath='', log=False):
    '''
    create a copy of existing CDL format file, where missing data is substituted with "*" sign
    and modify it file step by step:
    _________________________________

    1) reading CDL format file, extracting info from sections:
        "dimensions:",
        "variables:",
        !NOT IMPLEMENTED !    "data"

    2) reading observation data if nessesary (dimensions are unknown dim="*" )
    

    '''
    sname = 'make_cdl():'
    import shutil
    import read_cdl
    import os
    import read_metafile
    import funcs
    import re

    # first make a copy to work with
    path , fname = os.path.split(os.path.abspath(cdlFname))
    pathmeta , fnamemeta = os.path.split(os.path.abspath(metafname))
    name, extension = fname.split('.')
    #newFilename  = outpath+name+'_modified.'+extension
    tempFilename = cdlFname+'.tmp'
    newFileName = os.path.join(outpath, "_"+fname)
    #shutil.copyfile(cdlFname, cdlFname+'.bak')

    # proceed
    f = read_cdl.read_file(cdlFname, comments='//')
    parts, nparts, fname = read_cdl.get_parts(f)
    dims = read_cdl.process_dimensions(parts[0], log=log)
    Vars = read_cdl.process_variables(parts[1], log=log)
    read_cdl.process_variables_attributes(parts[1], Vars, log=log)
    read_cdl.check_variables_and_dimensions(dims, Vars)
    dimensions, udimension_arrays = read_cdl.check_undefined_dimensions(dims, metafname=metafname, log=log)
    Vars = read_cdl.extend_dimensions_with_sizes(Vars, dimensions)
    replace_undefined_dimensions(cdlFname, newFileName, dimensions, log=log)
    #replace_undefined_dimensions(cdlFname, newFilename, dims)
    metaData = read_metafile.read_metadata(metafname, log=log)
    # undefined_vars = read_cdl.get_undefVars()


    with open(newFileName, 'r') as fIn:
        if log: print sname, "creating temp file {0}".format(tempFilename)
        with open(tempFilename, 'w+') as fOut:
            for i, lineRaw in enumerate(fIn):  #cycling through lines in old file...
                lineRaw = lineRaw.decode('utf8')

                if len(lineRaw.split('//')) > 1:  # ignore comments
                    line = lineRaw.split("//")[0]
                else:
                    line = lineRaw

                if re.match('.*?=\s*\*.*', line):  # if * is present after sign = (undefined variable is here, cause there are already no undefined dimensions at this step)
                    uVarName = re.match('\s*?(.*?)\s*=.*', line).group(1).strip()
                    if log: print sname, "line {0} >>> Undefined variable '{1}'' found in CDL ".format(i+1, uVarName)
                    if uVarName in Vars.keys():
                        fOut.write('\t{0} =\n'.format(uVarName))  # first write variable name...

                        # creating array with data...
                        if log: print sname, 'Creating array based on: "{0}" = {1}'.format(uVarName, Vars[uVarName])
                        useCols = create_useCols_from_meta(metaData, Vars, uVarName, log=log)

                        if log: print sname, 'variable {0} >>> useCols = {1}'.format (uVarName, useCols)
                        obs = funcs.loadASCII(os.path.join(pathmeta, metaData['dataFname']), dtype=Vars[uVarName][0], delimiter=metaData['delimiter'],
                                              skiprows=metaData['skiprows'], usecols=useCols, log=log)
                        #for k ,v in Vars.iteritems(): print k, '>>>', v
                        if "_FillValue" in Vars[uVarName][2].keys():
                            fv = Vars[uVarName][2]["_FillValue"][0]
                        else:
                            fv = -999

                        data = funcs.create_array_from_data(Vars[uVarName][3], Vars[uVarName][1], uVarName, obs, fill_value=fv, log=log)

                        
                        # now write it to cdl in column order....
                        data = data.flatten(order="C")  # flatten array in column order as in C-language
                        if log: print sname, 'writing to files data-section values of variable <{0}>... be patient - this may take few minutes'.format(uVarName)
                        
                        for i, val in enumerate(data):  # write to file... string by string
                            if i != (data.size - 1):  # if not last element
                                fOut.write('\t\t{0} ,\n'.format(val))
                            else:  #if last element
                                fOut.write('\t\t{0} ;\n'.format(val))


                    else:
                        err_msg = 'CDL file contains undefined variable {0}, which is missing in dictionary "Vars"'.format(uVarName)
                        raise ValueError(err_msg+'\n\n')
                else:
                    if log : print sname, 'line {0} >>> no changes'.format(i+1)
                    fOut.write(lineRaw)  # if line without * => rewrite it
            fOut.close()
        fIn.close()

    try: os.remove(newFileName)  # if file exist - remove itrename() doesn't overwrite on Windows
    except: pass  # if file do not exist, do nothing
    os.rename(tempFilename, newFileName)
    if log: print sname, "renaming temp file into >>> {0}".format(newFileName)