예제 #1
0
파일: files.py 프로젝트: LI-COR/ONEFlux
def run_command(cmd):
    """
    Run command as subprocess

    :param cmd: command to be run
    :type cmd: list (of str)
    :rtype: str or None
    """
    _log.debug('External command execution: {c}'.format(c=cmd))
    try:
        sproc = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        stdout, stderr = sproc.communicate()
        exitcode = sproc.returncode
        if exitcode == 0:
            _log.debug("Execution succeeded: {o}".format(
                o=stdout.replace('\r', '  ').replace('\n', '  ')))
            if stderr:
                _log.warning("Execution STDERR: {o}".format(
                    o=stderr.replace('\r', '  ').replace('\n', '  ')))
        else:
            msg = 'Execution failed! EXITCODE: {c}  STDOUT: {o}  STDERR: {e}'.format(
                c=exitcode,
                o=stdout.replace('\r', '  ').replace('\n', '  '),
                e=stderr.replace('\r', '  ').replace('\n', '  '))
            _log.critical(msg)
            raise ONEFluxError(msg)
    except subprocess.CalledProcessError as e:
        msg = "Execution raised error: '{e}'".format(e=str(e))
        _log.critical(msg)
        raise ONEFluxError(msg)
    return stdout
예제 #2
0
def get_created_ustar_years(mpdir, cpdir):
    mpfiles = [
        f for f in os.listdir(mpdir) if os.path.isfile(os.path.join(mpdir, f))
    ]
    cpfiles = [
        f for f in os.listdir(cpdir) if os.path.isfile(os.path.join(cpdir, f))
    ]

    mpyears_all = [FOUR_DIGIT_RE.findall(f) for f in mpfiles]
    mpyears_all = [i for i in mpyears_all if i]
    cpyears_all = [FOUR_DIGIT_RE.findall(f) for f in cpfiles]
    cpyears_all = [i for i in cpyears_all if i]

    for y_list in mpyears_all + cpyears_all:
        if len(y_list) != 1:
            raise ONEFluxError(
                "Multiple years ({y}) found in MP/CP filenames: {f}".format(
                    y=y_list, f=mpfiles + cpfiles))
        year = int(y_list[0])
        if 1900 > year > 2100:
            raise ONEFluxError(
                "Year ({y}) out-of-range found in MP/CP filenames: {f}".format(
                    y=year, f=mpfiles + cpfiles))
    mpyears = [int(l[0]) for l in mpyears_all]
    cpyears = [int(l[0]) for l in cpyears_all]

    return mpyears, cpyears
예제 #3
0
파일: nighttime.py 프로젝트: LI-COR/ONEFlux
def ipolmiss(data, variable):
    """
    Interpolates missing values in data for variable
    (changes data object directly)

    Note: original code allowed multiple variables to be interpolated
          in a single call to ipolmiss, but only used as a single variable
          at a time; this implementation assumes single variable for
          each call

    :param data: data structure for partitioning
    :type data: numpy.ndarray
    :param variable: variable to be interpolated
    :type variable: str
    """
    _log.debug("Interpolating variable '{v}'".format(v=variable))

    #order = 2 # always 2 in all calls from original code; means linear interpolation
    method = 'Exact' # always 'Exact' from original code

    if not isinstance(data, numpy.ndarray):
        msg = "ipolmiss ({v}) data object is not ndarray: '{d}'".format(v=variable, d=str(data))
        _log.critical(msg)
        raise ONEFluxError(msg)

    if variable not in data.dtype.names:
        msg = "ipolmiss ({v}) label not in data object".format(v=variable)
        _log.critical(msg)
        raise ONEFluxError(msg)

    mask = not_nan(data[variable])
    count = numpy.sum(mask)

    if (count > 1) and ((count < data.size) or (method == 'LSQ')):
        idx = numpy.where(mask)[0]
        julday = data['julday'] + (data['hr'] / 24.0)
        duration = [i - julday[mask][0] for i in julday[mask]]

        if count < 6:
            _log.error("ipolmiss ({v}) too few elements: {c}".format(v=variable, c=count))

        # create interpolation function
        sp_interp_function = interp1d(duration, data[variable][mask], kind='linear', bounds_error=False, fill_value=numpy.NaN)

        # apply interpolation function to full extent of dataset
        duration_full = [i - julday[mask][0] for i in julday]
        data[variable][:] = sp_interp_function(duration_full)

        # set beginning/end gaps into first/last valid value
        data[variable][:idx[0]] = data[variable][idx[0]]
        data[variable][idx[-1] + 1:] = data[variable][idx[-1]]
예제 #4
0
def load_ustar_cut(siteid,
                   sitedir,
                   first_year,
                   last_year,
                   nee_perc_ustar_cut_template=NEE_PERC_USTAR_CUT):
    nee_perc_ustar_cut = nee_perc_ustar_cut_template.format(s=siteid,
                                                            sd=sitedir,
                                                            fy=first_year,
                                                            ly=last_year)
    log.debug("{s}: processing file: {f}".format(s=siteid,
                                                 f=nee_perc_ustar_cut))
    nee_perc_ustar_cut_lines = load_csv_lines(filename=nee_perc_ustar_cut)
    if (last_year - first_year) < 2:
        if not nee_perc_ustar_cut_lines:
            log.warning("{s}: too few years, {e} file not created: {f}".format(
                s=siteid, e='NEE CUT USTAR percentiles', f=nee_perc_ustar_cut))
        nee_perc_ustar_cut_values = {
            k: '-9999'
            for i, k in enumerate(PERCENTILES)
        }
        nee_perc_ustar_cut_values['50.00'] = nee_perc_ustar_cut_values['50']
    else:
        if not nee_perc_ustar_cut_lines:
            raise ONEFluxError("{s}: {e} file not found: {f}".format(
                s=siteid, e='NEE CUT USTAR percentiles', f=nee_perc_ustar_cut))

        if (len(nee_perc_ustar_cut_lines) == 3
                and not nee_perc_ustar_cut_lines[2].strip()
            ) or len(nee_perc_ustar_cut_lines) > 2:
            raise ONEFluxError(
                "{s}: NEE USTAR CUT file too many lines ({l}): {f}".format(
                    s=siteid,
                    l=len(nee_perc_ustar_cut_lines),
                    f=nee_perc_ustar_cut))
        elif not (nee_perc_ustar_cut_lines[0][0].startswith(PERCENTILES[0])
                  and nee_perc_ustar_cut_lines[0][-1].strip().endswith(
                      PERCENTILES[-1])):
            raise ONEFluxError(
                "{s}: NEE USTAR CUT bad headers ({h}): {f}".format(
                    s=siteid,
                    h=nee_perc_ustar_cut_lines[0],
                    f=nee_perc_ustar_cut))
        nee_perc_ustar_cut_values = {
            k: nee_perc_ustar_cut_lines[1][i].strip()
            for i, k in enumerate(PERCENTILES)
        }
        nee_perc_ustar_cut_values['50.00'] = nee_perc_ustar_cut_values['50']
    return nee_perc_ustar_cut_values
예제 #5
0
def load_ustar_vut(siteid, sitedir, year_range, nee_perc_ustar_vut_template=NEE_PERC_USTAR_VUT):
    nee_perc_ustar_vut = nee_perc_ustar_vut_template.format(s=siteid, sd=sitedir)
    log.debug("{s}: processing file: {f}".format(s=siteid, f=nee_perc_ustar_vut))
    nee_perc_ustar_vut_lines = load_csv_lines(filename=nee_perc_ustar_vut)
    if not nee_perc_ustar_vut_lines: raise ONEFluxError("{s}: {e} file not found: {f}".format(s=siteid, e='NEE VUT USTAR percentiles', f=nee_perc_ustar_vut))

    nee_perc_ustar_vut_values = {i:{} for i in year_range}
    if not ((nee_perc_ustar_vut_lines[0][0].lower().startswith('timestamp') or\
             nee_perc_ustar_vut_lines[0][0].lower().startswith('isodate') or\
             nee_perc_ustar_vut_lines[0][0].lower().startswith('year')) and\
            nee_perc_ustar_vut_lines[0][-1].endswith(PERCENTILES[-1])):
        raise ONEFluxError("{s}: NEE USTAR VUT bad headers ({h}): {f}".format(s=siteid, h=nee_perc_ustar_vut_lines[0], f=nee_perc_ustar_vut))
    elif (int(nee_perc_ustar_vut_lines[1][0]) != year_range[0]) or (int(nee_perc_ustar_vut_lines[-1][0]) != year_range[-1]):
        raise ONEFluxError("{s}: NEE USTAR VUT incompatible year range data=({d}), info=({i})".format(s=siteid, d="{f}-{l}".format(f=nee_perc_ustar_vut_lines[1][0], l=nee_perc_ustar_vut_lines[-1][0]), i="{f}-{l}".format(f=year_range[0], l=year_range[-1])))
    for y, year in enumerate(year_range):
        nee_perc_ustar_vut_values[year] = {k:nee_perc_ustar_vut_lines[y + 1][i + 1].strip() for i, k in enumerate(PERCENTILES) }
        nee_perc_ustar_vut_values[year]['50.00'] = nee_perc_ustar_vut_values[year]['50']
    return nee_perc_ustar_vut_values
예제 #6
0
파일: common.py 프로젝트: fluxnet/ONEFlux
def get_empty_array_year(year=datetime.now().year, start_end=True, variable_list=['TEST', ], variable_list_dtype=None, record_interval='HH'):
    """
    Allocates and returns new empty record array for given year using list of dtypes
    (or variable labels as 8byte floats if no dtype list provided) for variables plus
    TIMESTAMP_START and TIMESTAMP_END at beginning
    
    :param year: year to be represented in array (current year if not provided)
    :type year: int
    :param start_end: if True, uses TIMESTAMP_START and TIMESTAMP_END, if not, uses only TIMESTAMP (end)
    :type start_end: bool
    :param variable_list: list of strings to be used as variable labels (assumed f8 type)
    :type variable_list: list (of str)
    :param variable_list_dtype: list of dtype tuples (label, data type) to be used as variables
    :type variable_list_dtype: list (of (str, str)-tuples)
    :param record_interval: resolution to be used for record ['HR' for hourly, 'HH' for half-hourly (default)]
    :type record_interval: str
    """
    # record_interval
    if record_interval.lower() == 'hh':
        step = timedelta(minutes=30)
    elif record_interval.lower() == 'hr':
        step = timedelta(minutes=60)
    else:
        msg = 'Unknown record_interval: {r}'.format(r=record_interval)
        log.critical(msg)
        raise ONEFluxError(msg)

    # timestamp list
    timestamp_list = []
    current_timestamp = datetime(int(year), 1, 1, 0, 0, 0)
    while current_timestamp.year < int(year) + 1:
        timestamp_list.append(current_timestamp)
        current_timestamp += step
    timestamp_list.append(current_timestamp)
    timestamp_list_begin = timestamp_list[:-1]
    timestamp_list_end = timestamp_list[1:]

    # array dtype
    dtype = ([(var, 'f8') for var in variable_list] if variable_list_dtype is None else variable_list_dtype)
    if start_end:
        dtype = [('TIMESTAMP_START', 'a25'), ('TIMESTAMP_END', 'a25')] + dtype
    else:
        dtype = [('TIMESTAMP', 'a25'), ] + dtype

    # record array
    data = numpy.zeros(len(timestamp_list_begin), dtype=dtype)
    data[:] = -9999.0
    if start_end:
        data['TIMESTAMP_START'][:] = [i.strftime('%Y%m%d%H%M') for i in timestamp_list_begin]
        data['TIMESTAMP_END'][:] = [i.strftime('%Y%m%d%H%M') for i in timestamp_list_end]
    else:
        data['TIMESTAMP'][:] = [i.strftime('%Y%m%d%H%M') for i in timestamp_list_end]

    return data
예제 #7
0
파일: pipeline.py 프로젝트: fluxnet/ONEFlux
def run_pipeline(datadir, siteid, sitedir, firstyear, lastyear, version_data=VERSION_METADATA, version_proc=VERSION_PROCESSING, prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE, mcr_directory=None, timestamp=NOW_TS, record_interval='hh'):

    sitedir_full = os.path.abspath(os.path.join(datadir, sitedir))
    if not sitedir or not os.path.isdir(sitedir_full):
        msg = "Site directory for {s} not found: '{d}'".format(s=siteid, d=sitedir)
        log.critical(msg)
        raise ONEFluxError(msg)

    log.info("Started processing site dir {d}".format(d=sitedir))
    try:
        pipeline = Pipeline(siteid=siteid,
                    data_dir=sitedir_full,
                    data_dir_main=os.path.abspath(datadir),
                    site_dir=sitedir,
                    tool_dir=TOOL_DIRECTORY,
                    first_year=firstyear,
                    last_year=lastyear,
                    prod_to_compare=prod_to_compare,
                    perc_to_compare=perc_to_compare,
                    timestamp=timestamp,
                    record_interval=record_interval,
                    fluxnet2015_first_t1=firstyear,
                    fluxnet2015_last_t1=lastyear,
                    fluxnet2015_version_data=version_data,
                    fluxnet2015_version_processing=version_proc,
                    ustar_cp_mcr_dir=mcr_directory,
                    qc_auto_execute=True,
                    ustar_mp_execute=True,
                    ustar_cp_execute=True,
                    meteo_proc_execute=True,
                    nee_proc_execute=True,
                    energy_proc_execute=True,
                    nee_partition_nt_execute=True,
                    nee_partition_dt_execute=True,
                    prepare_ure_execute=True,
                    ure_execute=True,
                    fluxnet2015_execute=True,
                    fluxnet2015_site_plots=True,
                    simulation=False)
        pipeline.run()
        #csv_manifest_entries, zip_manifest_entries = pipeline.fluxnet2015.csv_manifest_entries, pipeline.fluxnet2015.zip_manifest_entries
        log.info("Finished processing site dir {d}".format(d=sitedir_full))
    except ONEFluxPipelineError as e:
        log.critical("ONEFlux Pipeline ERRORS processing site dir {d}".format(d=sitedir_full))
        log_trace(exception=e, level=logging.CRITICAL, log=log)
        raise
    except ONEFluxError as e:
        log.critical("ONEFlux ERRORS processing site dir {d}".format(d=sitedir_full))
        log_trace(exception=e, level=logging.CRITICAL, log=log)
        raise
    except Exception as e:
        log.critical("UNKNOWN ERRORS processing site dir {d}".format(d=sitedir_full))
        log_trace(exception=e, level=logging.CRITICAL, log=log)
        raise
예제 #8
0
def compress_file(filename):
    """
    Compresses file with gzip

    :param filename: path to file to be compressed
    :type filename: str
    :rtype: str or None
    """
    if not os.path.isfile(filename):
        msg = "File not found or cannot be accessed: '{f}'".format(f=filename)
        _log.critical(msg)
        raise ONEFluxError(msg)
    try:
        r = subprocess.call(['gzip', '-f', filename])
        _log.debug("Compressing file '{d}'. Result: {o}".format(
            d=filename, o=('success' if r == 0 else "fail ({r})".format(r=r))))
    except subprocess.CalledProcessError, e:
        msg = "Problems compressing file '{d}'. Error: '{e}'".format(
            d=filename, e=str(e))
        _log.critical(msg)
        raise ONEFluxError(msg)
예제 #9
0
파일: files.py 프로젝트: LI-COR/ONEFlux
def check_create_directory(directory):
    """
    Checks if directory exists and creates if not

    :param directory: path to be tested/created
    :type directory: str
    """
    if not os.path.isdir(directory):
        if os.path.exists(directory):
            msg = "Directory check: not a directory '{p}'".format(p=directory)
            _log.critical(msg)
            raise ONEFluxError(msg)
        else:
            os.makedirs(directory)
            _log.info("Created directory '{p}'".format(p=directory))
    return
예제 #10
0
파일: common.py 프로젝트: fluxnet/ONEFlux
def get_headers(filename):
    """
    Parse headers from FPFileV2 format and returns list
    of string with header labels.
    Must have at least two columns.
    
    :param filename: name of the FPFileV2 to be loaded
    :type filename: str
    :rtype: list
    """
    with open(filename, 'r') as f:
        line = f.readline()
    headers = line.strip().split(',')
    if len(headers) < 2:
        raise ONEFluxError("Headers too short: '{h}'".format(h=line))
    headers = [i.strip() for i in headers]
    return headers
예제 #11
0
파일: nighttime.py 프로젝트: LI-COR/ONEFlux
def pct(array, percent):
    """
    Calculates "percent" percentile of array -- not really a percentile,
    but similar intention. Following implementation in original code.

    :param array: 1-d array to be used in calculation
    :type array: numpy.ndarray
    :param percent: target percent value for percentile
    :type percent: float
    """

    nonnan_mask = not_nan(array)
    if numpy.sum(nonnan_mask) > 1:
        nonnan_array = array[nonnan_mask]
    else:
        msg = "No non-NA value in percentile calculation"
        _log.critical(msg)
        raise ONEFluxError(msg)

    # indices of ascending ranking of entries in array
    rank_idx_array = rankdata(nonnan_array, method='ordinal')
    critical_rank = len(nonnan_array) * percent / 100.
    over_critical_rank_mask = (rank_idx_array > critical_rank)

    # if no index over critical rank, return max values
    if numpy.sum(over_critical_rank_mask) == 0.0:
        return numpy.max(nonnan_array)

    ### smallest rank that is greater than critical rank (or SM-RK-GT-CR)
    critical_rank_idx = numpy.where(rank_idx_array == numpy.min(rank_idx_array[over_critical_rank_mask]))

    ### rank immediately before (SM-RK-GT-CR)
    critical_rank_idx_previous = numpy.where(rank_idx_array == (numpy.min(rank_idx_array[over_critical_rank_mask]) - 1))

    if critical_rank.is_integer() and (numpy.sum(critical_rank_idx_previous) != 0):
        return numpy.average([nonnan_array[critical_rank_idx[0]], nonnan_array[critical_rank_idx_previous[0]]])
    else:
        return nonnan_array[critical_rank_idx[0]][0]
예제 #12
0
                     py_remove_old=False):
    """
    Runs nighttime partitioning

    :param sitedir: absolute path to data directory
    :type sitedir: str
    :param siteid: site id in CC-SSS format
    :type siteid: str
    :param sitedir: relative path to data directory for site data
    :type sitedir: str
    :param years_to_compare: list of years to be compared
    :type years_to_compare: list
    :param nt_dir: template name for directory with nighttime partitioning data
    :type nt_dir: str
    :param filename_template: template of filename for nighttime partitioning data
                              (product, percentile, siteid, year, format, extension)
    :type filename_template: str
    :param prod_to_compare: list of products to compare ('c', 'y') - CUT/VUT
    :type prod_to_compare: list
    :param perc_to_compare: list of percentiles to compare ('1.25', '3.75', ..., '50', ..., '96.25', '98.75')
    :type perc_to_compare: list
    :param py_remove_old: if True, removes old python partitioning results (after backup), file has to be missing for run
    :type py_remove_old: bool
    """
    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=py_remove_old, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)


if __name__ == '__main__':
    raise ONEFluxError('Not executable')
예제 #13
0
    msg += ", site-id ({i})".format(i=args.siteid)
    msg += ", site-dir ({d})".format(d=args.sitedir)
    msg += ", first-year ({y})".format(y=firstyear)
    msg += ", last-year ({y})".format(y=lastyear)
    msg += ", perc ({i})".format(i=perc)
    msg += ", prod ({i})".format(i=prod)
    msg += ", log-file ({f})".format(f=args.logfile)
    msg += ", force-py ({i})".format(i=args.forcepy)
    log.debug(msg)

    # start execution
    try:
        # check arguments
        print(os.path.join(args.datadir, args.sitedir))
        if not os.path.isdir(os.path.join(args.datadir, args.sitedir)):
            raise ONEFluxError("Site dir not found: {d}".format(d=args.sitedir))

        # run command
        log.info("Starting execution: {c}".format(c=args.command))
        if args.command == 'all':
            run_pipeline(datadir=args.datadir, siteid=args.siteid, sitedir=args.sitedir, firstyear=firstyear, lastyear=lastyear,
                         prod_to_compare=prod, perc_to_compare=perc, mcr_directory=args.mcr_directory, timestamp=args.timestamp,
                         record_interval=args.recint)
        elif args.command == 'partition_nt':
            run_partition_nt(datadir=args.datadir, siteid=args.siteid, sitedir=args.sitedir, years_to_compare=range(firstyear, lastyear + 1),
                             py_remove_old=args.forcepy, prod_to_compare=prod, perc_to_compare=perc)
        elif args.command == 'partition_dt':
            run_partition_dt(datadir=args.datadir, siteid=args.siteid, sitedir=args.sitedir, years_to_compare=range(firstyear, lastyear + 1),
                             py_remove_old=args.forcepy, prod_to_compare=prod, perc_to_compare=perc)
        else:
            raise ONEFluxError("Unknown command: {c}".format(c=args.command))
예제 #14
0
    'GPP_DT_CUT_USTAR50',
    'GPP_DT_CUT_MEAN',
    'GPP_DT_CUT_SE',
] + \
['GPP_DT_CUT_{n}'.format(n=i) for i in ['05', '16', '25', '50', '75', '84', '95']] + \
[

    ### PARTITIONING SUNDOWN
    'RECO_SR',
    'RECO_SR_N'
]

for i, e in enumerate(VARIABLE_LIST_FULL):
    if VARIABLE_LIST_FULL.count(e) != 1:
        raise ONEFluxError(
            "Duplicated variable VARIABLE_LIST_FULL[{i}]: {v}".format(i=i,
                                                                      v=e))

VARIABLE_LIST_SUB = [
    ### TIMEKEEPING
    'TIMESTAMP',
    'TIMESTAMP_START',
    'TIMESTAMP_END',

    ### MICROMETEOROLOGICAL
    'TA_F',
    'TA_F_QC',
    'SW_IN_POT',
    'SW_IN_F',
    'SW_IN_F_QC',
    'LW_IN_F',
예제 #15
0
파일: nighttime.py 프로젝트: LI-COR/ONEFlux
def partitioning_nt(datadir, siteid, sitedir, prod_to_compare, perc_to_compare, years_to_compare):
    """
    NT partitioning wrapper function.
    Handles all "versions" (percentiles, CUT/VUT, years, etc)

    :param datadir: main data directory (full path)
    :type datadir: str
    :param siteid: site flux id to be processed - in format CC-SSS
    :type siteid: str
    :param sitedir: data directory for site (relative path to datadir)
    :type sitedir: str
    :param prod_to_compare: list of products to compare - ['c', 'y']
    :type prod_to_compare: list (of str)
    :param perc_to_compare: list of percentiles to compare - ['1.25', '3.75', ..., '96.25', '98.75']
    :type perc_to_compare: list (of str)
    :param years_to_compare: list of years to compare - [1996, 1997, ... , 2014]
    :type years_to_compare: list (of int)
    """

    _log.info("Started NT partitioning of {s}".format(s=siteid))

    sitedir_full = os.path.join(datadir, sitedir)
    qc_auto_dir = os.path.join(sitedir_full, QC_AUTO_DIR)
    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
    nee_proc_dir = os.path.join(sitedir_full, NEE_PROC_DIR)
    nt_output_dir = os.path.join(sitedir_full, NT_OUTPUT_DIR)

    # reformat percentiles to compare into data column labels
    percentiles_data_columns = [i.replace('.', HEADER_SEPARATOR) for i in perc_to_compare]

    # check and create output dir if needed
    if os.path.isdir(sitedir_full) and not os.path.isdir(nt_output_dir):
        check_create_directory(directory=nt_output_dir)

    # load meteo proc results
    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
    if not os.path.isfile(meteo_proc_f):
            msg = "Meteo proc file not found '{f}'".format(f=meteo_proc_f)
            _log.critical(msg)
            raise ONEFluxError(msg)
    _log.info("Will now load meteo file '{f}'".format(f=meteo_proc_f))
    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)

    # iterate through UStar threshold types
    for ustar_type in prod_to_compare:
        _log.info("Started processing UStar threshold type '{u}'".format(u=ustar_type))

        # load nee proc results (percentiles file)
        nee_proc_percentiles_f = os.path.join(nee_proc_dir, '{s}_NEE_percentiles_{u}_hh.csv'.format(s=siteid, u=ustar_type))
        if not os.path.isfile(nee_proc_percentiles_f):
            msg = "NEE proc file not found '{f}', trying '{n}'".format(f=nee_proc_percentiles_f, n='{f}')
            nee_proc_percentiles_f = os.path.join(nee_proc_dir, '{s}_NEE_percentiles_{u}.csv'.format(s=siteid, u=ustar_type))
            msg = msg.format(f=nee_proc_percentiles_f)
            _log.info(msg)

            if not os.path.isfile(nee_proc_percentiles_f):
                if ustar_type == 'y':
                    msg = "NEE proc file not found '{f}'".format(f=nee_proc_percentiles_f)
                    _log.critical(msg)
                    raise ONEFluxError(msg)
                elif ustar_type == 'c':
                    msg = "NEE proc file not found '{f}', skipping (CUT not computed?)".format(f=nee_proc_percentiles_f)
                    _log.warning(msg)
                    continue
                else:
                    msg = "Invalid USTAR type '{u}'".format(u=ustar_type)
                    raise ONEFluxError(msg)
        _log.info("Will now load nee percentiles file '{f}'".format(f=nee_proc_percentiles_f))
        whole_dataset_nee, headers_nee, timestamp_list_nee, year_list_nee = load_output(nee_proc_percentiles_f)

        # iterate through each year
        for iteration, year in enumerate(year_list_nee):
            if year not in years_to_compare:
                continue
            _log.info("Started processing year '{y}'".format(y=year))
            qc_auto_nee_f = os.path.join(qc_auto_dir, '{s}_qca_nee_{y}.csv'.format(s=siteid, y=year))
            if not os.path.isfile(qc_auto_nee_f):
                msg = "QC auto file not found '{f}'".format(f=qc_auto_nee_f)
                _log.error(msg)
                continue
            latitude = get_latitude(filename=qc_auto_nee_f)

            # iterate through UStar threshold values
            for percentile in percentiles_data_columns:
                _log.info("Started processing percentile '{p}'".format(p=percentile))
                percentile_print = percentile.replace(HEADER_SEPARATOR, '.')
                output_filename = os.path.join(nt_output_dir, "nee_{t}_{p}_{s}_{y}{extra}.csv".format(t=ustar_type, p=percentile_print, s=siteid, y=year, extra=EXTRA_FILENAME))
                temp_output_filename = os.path.join(nt_output_dir, "nee_{t}_{p}_{s}_{y}{extra}.csv".format(t=ustar_type, p=percentile_print, s=siteid, y=year, extra='{extra}'))
                if os.path.isfile(output_filename):
                    _log.info("Output file found, skipping: '{f}'".format(f=output_filename))
                    continue
                else:
                    _log.debug("Output file missing, will be processed: '{f}'".format(f=output_filename))

                # create masks for current year for both nee and meteo
                year_mask_nee = (whole_dataset_nee['year'] == year)
                year_mask_meteo = (whole_dataset_meteo['year'] == year)

                # account for first entry being from previous year
                if iteration == 0:
                    _log.debug("First site-year available ({y}), removing first midnight entry from meteo only".format(y=year))
                    first_meteo = numpy.where(year_mask_meteo == 1)[0][0]
                    first_nee = None
                    year_mask_meteo[first_meteo] = 0
                else:
                    _log.debug("Regular site-year ({y}), removing first midnight entry from meteo and nee".format(y=year))
                    first_meteo = numpy.where(year_mask_meteo == 1)[0][0]
                    first_nee = numpy.where(year_mask_nee == 1)[0][0]
                    year_mask_meteo[first_meteo] = 0
                    year_mask_nee[first_nee] = 0

                # account for last entry being from next year
                _log.debug("Site-year ({y}), adding first midnight entry from next year for meteo and nee".format(y=year))
                last_meteo = numpy.where(year_mask_meteo == 1)[0][-1] + 1
                last_nee = numpy.where(year_mask_nee == 1)[0][-1] + 1
                year_mask_meteo[last_meteo] = 1
                year_mask_nee[last_nee] = 1

                _log.debug("Site-year {y}: first NEE '{tn}' and first meteo '{tm}'".format(y=year, tn=whole_dataset_nee[year_mask_nee][0]['timestamp_end'], tm=whole_dataset_meteo[year_mask_meteo][0]['timestamp_end']))
                _log.debug("Site-year {y}:  last NEE '{tn}' and  last meteo '{tm}'".format(y=year, tn=whole_dataset_nee[year_mask_nee][-1]['timestamp_end'], tm=whole_dataset_meteo[year_mask_meteo][-1]['timestamp_end']))

                if numpy.sum(year_mask_nee) != numpy.sum(year_mask_meteo):
                    msg = "Incompatible array sizes (nee={n}, meteo={m}) for year '{y}' while processing '{f}'".format(y=year, f=output_filename, n=numpy.sum(year_mask_nee), m=numpy.sum(year_mask_meteo))
                    _log.error(msg)
                    raise ONEFluxError(msg)

                working_year_data = create_data_structures(ustar_type=ustar_type, whole_dataset_nee=whole_dataset_nee, whole_dataset_meteo=whole_dataset_meteo,
                                                           percentile=percentile, year_mask_nee=year_mask_nee, year_mask_meteo=year_mask_meteo, latitude=latitude, part_type=NT_STR)

                # corresponds to partitnioning_nt.pro, line:  compu, set, "QCNEE=0"   # NOTE: removes all information of missing data records!
                compu(data=working_year_data, func=compu_qcnee_filter, columns=['qcnee']) # equivalent to: working_year_data['qcnee'][:] = 0

                # get latitude from data structure
                lat = var(working_year_data, 'lat')

                # call flux_partition
                result_year_data = flux_partition(data=working_year_data, lat=lat[0], tempvar='tair', temp_output_filename=temp_output_filename)

                # save output data file
                _log.debug("Saving output file '{f}".format(f=output_filename))
                numpy.savetxt(fname=output_filename, X=result_year_data, delimiter=',', fmt='%s', header=','.join(result_year_data.dtype.names), comments='')
                _log.debug("Saved output file '{f}".format(f=output_filename))

                _log.info("Finished processing percentile '{p}'".format(p=percentile))
#                sys.exit('EXIT') # TODO: testing only, remove
            _log.info("Finished processing year '{y}'".format(y=year))
        _log.info("Finished processing UStar threshold type '{u}'".format(u=ustar_type))
    _log.info("Finished NT partitioning of {s}".format(s=siteid))
예제 #16
0
    'doright4',
    'doright5',
]

### parse site information
SITES_D = {}
SITES_FOLDERS_D = {}
for e in SITES_TIERS_FOLDERS:
    db, siteid, fy, ly, tier, version, sitedir = e.strip().split()
    if tier.strip().upper() not in ['TIER1', 'TIER2']:
        msg = "Unknown TIER '{t}' for site {s} ({f}-{l})".format(t=tier,
                                                                 s=siteid,
                                                                 f=fy,
                                                                 l=ly)
        log.error(msg)
        raise ONEFluxError(msg)

    # sites-tiers
    if siteid in SITES_D:
        SITES_D[siteid][tier].append((fy, ly))
        SITES_D[siteid]['FY'] = min(SITES_D[siteid]['FY'], fy)
        SITES_D[siteid]['LY'] = max(SITES_D[siteid]['LY'], ly)
        if SITES_D[siteid]['version'] != version:
            log.error(
                "Versions differ for different tiers for site {s} ({v1} <> {v2})"
                .format(s=siteid, v1=SITES_D[siteid]['version'], v2=version))
    else:
        SITES_D[siteid] = {}
        SITES_D[siteid]['TIER1'] = []
        SITES_D[siteid]['TIER2'] = []
        SITES_D[siteid]['version'] = version
예제 #17
0
    msg += ", site-dir ({d})".format(d=args["sitedir"])
    msg += ", first-year ({y})".format(y=firstyear)
    msg += ", last-year ({y})".format(y=lastyear)
    msg += ", perc ({i})".format(i=perc)
    msg += ", prod ({i})".format(i=prod)
    msg += ", log-file ({f})".format(f=args["logfile"])
    msg += ", force-py ({i})".format(i=args["forcepy"])
    log.debug(msg)

    # start execution
    try:
        # check arguments
        # PRI 2020/10/23 - changed use of args to dictionary syntax
        print os.path.join(args["datadir"], args["sitedir"])
        if not os.path.isdir(os.path.join(args["datadir"], args["sitedir"])):
            raise ONEFluxError(
                "Site dir not found: {d}".format(d=args["sitedir"]))

        # run command
        # PRI 2020/10/23 - changed use of args to dictionary syntax
        log.info("Starting execution: {c}".format(c=args["command"]))
        if args["command"] == 'all':
            # PRI 2020/10/22
            # dictionary of logicals to control which pipeline steps will be executed
            pipeline_steps = {
                "qc_auto_execute": True,
                "ustar_mp_execute": True,
                "ustar_cp_execute": False,
                "meteo_proc_execute": True,
                "nee_proc_execute": True,
                "energy_proc_execute": True,
                "nee_partition_nt_execute": True,
예제 #18
0
def generate_nee(siteid, sitedir, first_year, last_year, version_data, version_processing, pipeline=None):
    log.debug("{s}: starting generation of AUXNEE file".format(s=siteid))

    nee_info_template = (NEE_INFO if pipeline is None else pipeline.nee_info)
    unc_info_template = (UNC_INFO if pipeline is None else pipeline.unc_info)
    unc_info_alt_template = (UNC_INFO_ALT if pipeline is None else pipeline.unc_info_alt)
    prodfile_aux_template = (PRODFILE_AUX_TEMPLATE if pipeline is None else pipeline.prodfile_aux_template)
    mpdir_template = (MPDIR if pipeline is None else pipeline.mpdir)
    cpdir_template = (CPDIR if pipeline is None else pipeline.cpdir)
    nee_perc_ustar_cut_template = (NEE_PERC_USTAR_CUT if pipeline is None else pipeline.nee_perc_ustar_cut)
    nee_perc_ustar_vut_template = (NEE_PERC_USTAR_VUT if pipeline is None else pipeline.nee_perc_ustar_vut)

    ### TEST year range for real range. Some sites have only energy/water fluxes for some of the first or last years, so NEE has shorter range
    nee_perc_ustar_cut = nee_perc_ustar_cut_template.format(s=siteid, sd=sitedir, fy=first_year, ly=last_year)
    if not os.path.isfile(nee_perc_ustar_cut):
        log.warning("{s}: looking for alternate USTAR CUT percentiles file, NOT found: {f}".format(s=siteid, f=nee_perc_ustar_cut))
        alt_nee_perc_ustar_cut = test_pattern(tdir=os.path.join(sitedir, NEEDIR_PATTERN), tpattern=NEE_PERC_USTAR_CUT_PATTERN.format(s=siteid, sd=sitedir, fy='????', ly='????'), label='aux_info_files', log_only=True)
        if len(alt_nee_perc_ustar_cut) == 1:
            log.warning("{s}: alternate USTAR CUT percentiles file FOUND: {f}".format(s=siteid, f=alt_nee_perc_ustar_cut))
            alt_nee_perc_ustar_cut = alt_nee_perc_ustar_cut[0]
            _, first_year, last_year, _ = alt_nee_perc_ustar_cut.split('_', 3)
            first_year, last_year = int(first_year), int(last_year)
        else:
            log.warning("{s}: incorrect number of NEE_PERC_USTAR_CUT files found (1-2 years record?): {l}".format(s=siteid, l=alt_nee_perc_ustar_cut))

#        # PREVIOU SINGLE YEAR IMPLEMENTATION
#        alt_nee_perc_ustar_cut = NEE_PERC_USTAR_CUT.format(s=siteid, sd=sitedir, fy=int(first_year) + 1, ly=last_year)
#        if os.path.isfile(alt_nee_perc_ustar_cut):
#            log.warning("{s}: alternate USTAR CUT percentiles file FOUND: {f}".format(s=siteid, f=alt_nee_perc_ustar_cut))
#            first_year += 1
#        else:
#            log.error("{s}: alternate USTAR CUT percentiles file NOT found: {f}".format(s=siteid, f=alt_nee_perc_ustar_cut))

    year_range = range(int(first_year), int(last_year) + 1)

    ### process NEE USTAR CUT
    nee_perc_ustar_cut_values = load_ustar_cut(siteid=siteid, sitedir=sitedir, first_year=first_year, last_year=last_year, nee_perc_ustar_cut_template=nee_perc_ustar_cut_template)

    ### process NEE USTAR VUT
    nee_perc_ustar_vut_values = load_ustar_vut(siteid=siteid, sitedir=sitedir, year_range=year_range, nee_perc_ustar_vut_template=nee_perc_ustar_vut_template)

    # process NEE, RECO, and GPP info
    u50_ustar_perc = dict({'CUT':nee_perc_ustar_cut_values['50.00']}.items() + {year:threshold['50.00'] for year, threshold in nee_perc_ustar_vut_values.items()}.items())
    nee_ref_ustar_perc = {i:{} for i in RESOLUTION_LIST}
    unc_ref_ustar_perc = {i:{j:{} for j in RESOLUTION_LIST} for i in ['RECO_NT', 'GPP_NT', 'RECO_DT', 'GPP_DT']}
    ustar_not_working = {'files_mp':set(), 'files_cp':set(), 'info_mp':set(), 'info_cp':set()}

    lines = []
    for res in RESOLUTION_LIST:
        # process NEE
        nee_info = nee_info_template.format(s=siteid, sd=sitedir, r=res)
        method_line_num = None
        if not os.path.isfile(nee_info):
            raise ONEFluxError("NEE info file not found: {f}".format(f=nee_info))
        with open(nee_info, 'r') as f:
            lines = f.readlines()
        for line_num, line in enumerate(reversed(lines)):
            # NEE REF VUT
            if line.strip().lower().startswith('nee_ref_y'):
                year_extra = None
                unsplit_year = line.strip().lower().split('on year')
                if len(unsplit_year) == 2:
                    year = int(unsplit_year[1].strip().split()[0].strip())
                elif len(unsplit_year) == 1 and len(year_range) == 1:
                    year = first_year
                elif len(unsplit_year) == 1 and len(year_range) == 2:
                    year = first_year
                    year_extra = last_year
                else:
                    raise ONEFluxError("{s}: Unknown NEE VUT REF percentile/threshold entry in line: '{l}'".format(s=siteid, l=line.strip()))
                threshold = line.strip().lower().split('ustar percentile')[1].strip().split()[0].strip()
                ustar = nee_perc_ustar_vut_values[year][threshold]
                if year in nee_ref_ustar_perc[res]:
                    raise ONEFluxError("{s} duplicated entry for NEE REF VUT USTAR: {f}".format(s=siteid, f=nee_info))
                else:
                    nee_ref_ustar_perc[res][year] = (threshold, ustar)
                if year_extra:
                    if year_extra in nee_ref_ustar_perc[res]:
                        raise ONEFluxError("{s} duplicated entry for NEE REF VUT USTAR: {f}".format(s=siteid, f=nee_info))
                    else:
                        nee_ref_ustar_perc[res][year_extra] = (threshold, ustar)
                        year_extra = None
#                print 'VUT', res, year, threshold, ustar
            # NEE REF CUT
            elif line.strip().lower().startswith('nee_ref_c'):
                threshold = line.strip().lower().split('ustar percentile')[1].strip().split()[0].strip()
                ustar = nee_perc_ustar_cut_values[threshold]
                if 'CUT' in nee_ref_ustar_perc[res]:
                    raise ONEFluxError("{s} duplicated entry for NEE REF CUT USTAR: {f}".format(s=siteid, f=nee_info))
                else:
                    nee_ref_ustar_perc[res]['CUT'] = (threshold, ustar)
#                print 'CUT', res, threshold, ustar
            # USTAR-method-not-working entries start
            elif ('year' in line.strip().lower()) and ('method not applied' in line.strip().lower()):
                if method_line_num is not None:
                    raise ONEFluxError('Two lines (#{l1} and #{l2}) starting with info for USTAR method not working: {f}'.format(l1=method_line_num, l2=len(lines) - line_num - 1, f=nee_info))
                method_line_num = len(lines) - line_num - 1

        # USTAR-method-not-working entries detect
        if method_line_num:
            lnum = method_line_num
            while lnum < len(lines):
                if is_int(lines[lnum].strip()[:4]):
                    if lines[lnum].strip().lower().endswith('mp'):
                        ustar_not_working['info_mp'].add(int(lines[lnum].strip()[:4]))
#                        print lines[lnum].strip()
                    elif lines[lnum].strip().lower().endswith('cp'):
                        ustar_not_working['info_cp'].add(int(lines[lnum].strip()[:4]))
#                        print lines[lnum].strip()
                lnum += 1
        else:
            log.warning('USTAR-method-not-working entries not found at {r} for: {f}'.format(r=res.upper(), f=nee_info))

        # USTAR-method-not-working files detect
        mpyears, cpyears = get_created_ustar_years(mpdir=mpdir_template.format(sd=sitedir), cpdir=cpdir_template.format(sd=sitedir))
        ustar_not_working['files_mp'] = set(year_range) - set(mpyears)
        ustar_not_working['files_cp'] = set(year_range) - set(cpyears)
#        print 'MP: ', mpyears
#        print 'CP: ', cpyears
#        print

        # process RECO, GPP
        for method, variable in [('NT', 'RECO'), ('NT', 'GPP'), ('DT', 'RECO'), ('DT', 'GPP')]:
            key = variable + '_' + method
            unc_info = unc_info_template.format(s=siteid, sd=sitedir, m=method, v=variable, r=res)
            if not os.path.isfile(unc_info):
                unc_info = unc_info_alt_template.format(s=siteid, sd=sitedir, m=method, v=variable, r=res)
                if not os.path.isfile(unc_info):
                    raise ONEFluxError("UNC info file not found: {f}".format(f=unc_info))
            log.debug("{s}: processing file: {f}".format(s=siteid, f=unc_info))
            with open(unc_info, 'r') as f:
                lines = f.readlines()
            for line in reversed(lines):
                # RECO/GPP REF VUT
                if line.strip().lower().startswith('{v}_ref_y'.format(v=variable).lower()):
                    year_extra = None
                    unsplit_year = line.strip().lower().split('on year')
                    if len(unsplit_year) == 2:
                        year = int(unsplit_year[1].strip().split()[0].strip())
                    elif len(unsplit_year) == 1 and len(year_range) == 1:
                        year = first_year
                    elif len(unsplit_year) == 1 and len(year_range) == 2:
                        year = first_year
                        year_extra = last_year
                    else:
                        raise ONEFluxError("{s}: Unknown RECO/GPP VUT REF percentile/threshold entry in line: '{l}'".format(s=siteid, l=line.strip()))
                    threshold = line.strip().lower().split('ustar percentile')[1].strip().split()[0].strip()
                    ustar = nee_perc_ustar_vut_values[year][threshold]
                    if year in unc_ref_ustar_perc[key][res]:
                        raise ONEFluxError("{s} duplicated entry for {v} REF VUT USTAR: {f}".format(s=siteid, f=nee_info, v=variable))
                    else:
                        unc_ref_ustar_perc[key][res][year] = (threshold, ustar)
                    if year_extra:
                        if year_extra in unc_ref_ustar_perc[key][res]:
                            raise ONEFluxError("{s} duplicated entry for RECO/GPP REF VUT USTAR: {f}".format(s=siteid, f=nee_info))
                        else:
                            unc_ref_ustar_perc[key][res][year_extra] = (threshold, ustar)
                            year_extra = None
#                    print variable, method, 'VUT', res, year, threshold, ustar
                # RECO/GPP REF CUT
                elif line.strip().lower().startswith('{v}_ref_c'.format(v=variable).lower()):
                    threshold = line.strip().lower().split('ustar percentile')[1].strip().split()[0].strip()
                    ustar = nee_perc_ustar_cut_values[threshold]
                    if 'CUT' in unc_ref_ustar_perc[key][res]:
                        raise ONEFluxError("{s} duplicated entry for {v} REF CUT USTAR: {f}".format(s=siteid, f=nee_info, v=variable))
                    else:
                        unc_ref_ustar_perc[key][res]['CUT'] = (threshold, ustar)
#                    print variable, method, 'CUT', res, threshold, ustar

    output_lines = [','.join(AUX_HEADER) + '\n']

    # output USTAR not working
    entry_number = 1
    for year in year_range:
        if year in ustar_not_working['files_mp']:
            nline = "{i},USTAR_MP_METHOD,SUCCESS_RUN,0,{y}\n".format(i=entry_number, y=year)
            if year not in ustar_not_working['info_mp']:
                log.warning("{s}: USTAR_MP, year {y} not found in files, but success in info".format(s=siteid, y=year))
        else:
            nline = "{i},USTAR_MP_METHOD,SUCCESS_RUN,1,{y}\n".format(i=entry_number, y=year)
            if year in ustar_not_working['info_mp']:
                log.error("{s}: USTAR_MP, year {y} failed in info, but found in files".format(s=siteid, y=year))
        output_lines.append(nline)

    entry_number += 1
    for year in year_range:
        if year in ustar_not_working['files_cp']:
            nline = "{i},USTAR_CP_METHOD,SUCCESS_RUN,0,{y}\n".format(i=entry_number, y=year)
            if year not in ustar_not_working['info_cp']:
                log.warning("{s}: USTAR_CP, year {y} not found in files, but success in info".format(s=siteid, y=year))
        else:
            nline = "{i},USTAR_CP_METHOD,SUCCESS_RUN,1,{y}\n".format(i=entry_number, y=year)
            if year in ustar_not_working['info_cp']:
                log.error("{s}: USTAR_CP, year {y} failed in info, but found in files".format(s=siteid, y=year))
        output_lines.append(nline)

    # output USTAR_THRESHOLD_50
    entry_number += 1
    nline = "{i},NEE_CUT_USTAR50,USTAR_THRESHOLD,{v},-9999\n".format(i=entry_number, v=u50_ustar_perc.get('CUT', -9999))
    output_lines.append(nline)
    entry_number += 1
    for year in year_range:
        nline = "{i},NEE_VUT_USTAR50,USTAR_THRESHOLD,{v},{y}\n".format(i=entry_number, v=u50_ustar_perc.get(year, -9999), y=year)
        output_lines.append(nline)

    # output NEE REF
    for res in RESOLUTION_LIST:
        entry_number += 1
        perc, thres = nee_ref_ustar_perc[res].get('CUT', (-9999, -9999))
        nline = "{i},NEE_CUT_REF,{r}_USTAR_PERCENTILE,{v},-9999\n".format(i=entry_number, r=res.upper(), v=perc)
        output_lines.append(nline)
        nline = "{i},NEE_CUT_REF,{r}_USTAR_THRESHOLD,{v},-9999\n".format(i=entry_number, r=res.upper(), v=thres)
        output_lines.append(nline)
        entry_number += 1
        for year in year_range:
            perc, thres = nee_ref_ustar_perc[res].get(year, (-9999, -9999))
            nline = "{i},NEE_VUT_REF,{r}_USTAR_PERCENTILE,{v},{y}\n".format(i=entry_number, r=res.upper(), v=perc, y=year)
            output_lines.append(nline)
            nline = "{i},NEE_VUT_REF,{r}_USTAR_THRESHOLD,{v},{y}\n".format(i=entry_number, r=res.upper(), v=thres, y=year)
            output_lines.append(nline)

    # output UNC REF
    for prod in ['RECO_NT', 'GPP_NT', 'RECO_DT', 'GPP_DT']:
        for res in RESOLUTION_LIST:
            entry_number += 1
            perc, thres = unc_ref_ustar_perc[prod][res].get('CUT', (-9999, -9999))
            nline = "{i},{p}_CUT_REF,{r}_USTAR_PERCENTILE,{v},-9999\n".format(i=entry_number, p=prod, r=res.upper(), v=perc)
            output_lines.append(nline)
            nline = "{i},{p}_CUT_REF,{r}_USTAR_THRESHOLD,{v},-9999\n".format(i=entry_number, p=prod, r=res.upper(), v=thres)
            output_lines.append(nline)
            entry_number += 1
            for year in year_range:
                perc, thres = unc_ref_ustar_perc[prod][res].get(year, (-9999, -9999))
                nline = "{i},{p}_VUT_REF,{r}_USTAR_PERCENTILE,{v},{y}\n".format(i=entry_number, p=prod, r=res.upper(), v=perc, y=year)
                output_lines.append(nline)
                nline = "{i},{p}_VUT_REF,{r}_USTAR_THRESHOLD,{v},{y}\n".format(i=entry_number, p=prod, r=res.upper(), v=thres, y=year)
                output_lines.append(nline)

    # output CUT thresholds
    for perc in PERCENTILES_SORTED:
        entry_number += 1
        thres = nee_perc_ustar_cut_values[perc]
        nline = "{i},USTAR_CUT,USTAR_PERCENTILE,{v},-9999\n".format(i=entry_number, v=perc)
        output_lines.append(nline)
        nline = "{i},USTAR_CUT,USTAR_THRESHOLD,{v},-9999\n".format(i=entry_number, v=thres)
        output_lines.append(nline)

    # output VUT thresholds
    for year in year_range:
        for perc in PERCENTILES_SORTED:
            entry_number += 1
            thres = nee_perc_ustar_vut_values[year][perc]
            nline = "{i},USTAR_VUT,USTAR_PERCENTILE,{v},{y}\n".format(i=entry_number, v=perc, y=year)
            output_lines.append(nline)
            nline = "{i},USTAR_VUT,USTAR_THRESHOLD,{v},{y}\n".format(i=entry_number, v=thres, y=year)
            output_lines.append(nline)

    output_filename = prodfile_aux_template.format(s=siteid, sd=sitedir, aux='AUXNEE', fy=first_year, ly=last_year, vd=version_data, vp=version_processing)
    log.info("{s}: writing auxiliary NEE file: {f}".format(s=siteid, f=output_filename))
    with open(output_filename, 'w') as f:
        f.writelines(output_lines)
    log.debug("{s}: finished generation of AUXNEE file: {f}".format(s=siteid, f=output_filename))

    return output_filename
예제 #19
0
def generate_meteo(siteid, sitedir, first_year, last_year, version_data, version_processing, pipeline=None):
    log.debug("{s}: starting generation of AUXMETEO file".format(s=siteid))
    meteo_info = (METEO_INFO if pipeline is None else pipeline.meteo_info)
    prodfile_aux_template = (PRODFILE_AUX_TEMPLATE if pipeline is None else pipeline.prodfile_aux_template)

    filename = meteo_info.format(s=siteid, sd=sitedir, r='hh')
    if not os.path.isfile(filename):
        raise ONEFluxError("{s}: meteo info file not found: {f}".format(s=siteid, f=filename))

    H_BEGIN, H_END = "var", "corr"
    VAR_D = {
    'Ta': 'TA',
    'TA': 'TA',
    'Pa': 'PA',
    'PA': 'PA',
    'VPD': 'VPD',
    'WS': 'WS',
    'Precip': 'P',
    'P': 'P',
    'Rg': 'SW_IN',
    'SW_IN': 'SW_IN',
    'LWin': 'LW_IN',
    'LW_IN': 'LW_IN',
    'LWin_calc': 'LW_IN_JSB',
    'LW_IN_calc': 'LW_IN_JSB',
    }

    lines = []
    with open(filename, 'r') as f:
        lines = f.readlines()

    c_var, c_slope, c_intercept, c_rmse, c_corr = 0, 1, 2, 3, 4
    first_line = None
    for i, line in enumerate(lines):
        l = line.strip().lower()
        if l.startswith(H_BEGIN) and l.endswith(H_END):
            first_line = i
            break

    if first_line is None:
        raise ONEFluxError("{s}: first line of meteo info file not found: {f}".format(s=siteid, f=filename))
    if 'unit' in lines[first_line].lower():
        log.info("{s}: handling old format meteo info file: {f}".format(s=siteid, f=filename))
        c_slope, c_intercept, c_rmse, c_corr = 3, 4, 5, 6

    vars_l = ['TA', 'PA', 'VPD', 'WS', 'P', 'SW_IN', 'LW_IN', 'LW_IN_JSB']
    #pars_l = ['ERA_SLOPE', 'ERA_INTERCEPT', 'ERA_RMSE', 'ERA_CORRELATION']
    values = {i:None for i in vars_l}
    for line in lines[first_line + 1:first_line + 9]:
        l = line.strip().split(',')
        values[VAR_D[l[c_var]]] = [(float(l[c_slope].strip()) if (l[c_slope].strip() and l[c_slope].strip() != '-') else -9999),
                                   (float(l[c_intercept].strip()) if (l[c_intercept].strip() and l[c_intercept].strip() != '-') else -9999),
                                   (float(l[c_rmse].strip()) if (l[c_rmse].strip() and l[c_rmse].strip() != '-') else -9999),
                                   (float(l[c_corr].strip()) if (l[c_corr].strip() and l[c_corr].strip() != '-') else -9999),
                                  ]

    output_lines = [','.join(AUX_HEADER) + '\n']
    for i, var in enumerate(vars_l, start=1):
        if values[var] is None:
            raise ONEFluxError("{s}: ERA variable '{v}' not found in: {f}".format(s=siteid, v=var, f=filename))
        slope = ("{v:.2f}".format(v=values[var][0]) if values[var][0] != -9999 else '-9999')
        intercept = ("{v:.2f}".format(v=values[var][1]) if values[var][1] != -9999 else '-9999')
        rmse = ("{v:.2f}".format(v=values[var][2]) if values[var][2] != -9999 else '-9999')
        corr = ("{v:.2f}".format(v=values[var][3]) if values[var][3] != -9999 else '-9999')
        output_lines.append("{i},{v},{p},{val},{t}\n".format(i=i, v=var, p='ERA_SLOPE', val=slope, t='-9999'))
        output_lines.append("{i},{v},{p},{val},{t}\n".format(i=i, v=var, p='ERA_INTERCEPT', val=intercept, t='-9999'))
        output_lines.append("{i},{v},{p},{val},{t}\n".format(i=i, v=var, p='ERA_RMSE', val=rmse, t='-9999'))
        output_lines.append("{i},{v},{p},{val},{t}\n".format(i=i, v=var, p='ERA_CORRELATION', val=corr, t='-9999'))

    output_filename = prodfile_aux_template.format(s=siteid, sd=sitedir, aux='AUXMETEO', fy=first_year, ly=last_year, vd=version_data, vp=version_processing)
    log.info("{s}: writing auxiliary METEO file: {f}".format(s=siteid, f=output_filename))
    with open(output_filename, 'w') as f:
        f.writelines(output_lines)
    log.debug("{s}: finished generation of AUXMETEO file: {f}".format(s=siteid, f=output_filename))

    return output_filename
예제 #20
0
파일: nighttime.py 프로젝트: LI-COR/ONEFlux
def nlinlts1(data, func=lloyd_taylor, depvar='neenight', indepvar='tair', npara=2, xguess=[2.0, 200.0], trim_perc=BR_PERC):
    """
    Main non-linear least-squares driver function

    :param data: data structure for partitioning
    :type data: numpy.ndarray
    :param func: function to be optimized
    :type func: function
    :param depvar: dependent variable (computed by function)
    :type depvar: str
    :param indepvar: independent variable (parameter to function)
    :type indepvar: str
    :param npara: number of parameters to be optimized
    :type npara: int
    :param xguess: list with initial/starting guesses for variables to be optimized
    :type xguess: list
    :param trim_perc: precentage to trim from residual values
    :type trim_perc: float
    """
    if len(xguess) != npara:
        msg = "Incompatible number of parameters '{n}' and length of initial guess '{i}'".format(n=npara, i=len(xguess))
        _log.critical(msg)
        raise ONEFluxError(msg)

    status = 0 # status of execution; 0 optimization executed successfully, -1 problem with execution of optimization
    first_ts, last_ts = get_first_last_ts(data=data)
#    _log.debug("Starting optimization step for period '{ts1}' - '{ts2}'".format(ts1=first_ts.strftime('%Y-%m-%d %H:%M'), ts2=last_ts.strftime('%Y-%m-%d %H:%M')))

    # check number of entries for independent variable
    nonnan_indep_mask = not_nan(data[indepvar])
    if numpy.sum(nonnan_indep_mask) < (npara * 3):
        _log.warning("Not enough data points (independent variable filtered) for optimization: {n}".format(n=numpy.sum(nonnan_indep_mask)))
        status = -1
        return status, -9999.0, -9999.0, -9999.0, -9999.0, None, None, None, None, None, None, None

    # check number of entries for dependent AND independent variable
    nonnan_dep_mask = not_nan(data[depvar])
    nonnan_combined_mask = nonnan_indep_mask & nonnan_dep_mask
    if numpy.sum(nonnan_combined_mask) < (npara * 3):
        _log.warning("Not enough data points (dependent and independent variable filtered) for optimization: {n}".format(n=numpy.sum(nonnan_combined_mask)))
        status = -1
        return status, -9999.0, -9999.0, -9999.0, -9999.0, None, None, None, None, None, None, None

    # "clean" dependent variable so not to use NAs from independent variable
    clean_dep = data[depvar].copy()
    clean_dep[~nonnan_indep_mask] = NAN

#    print
#    print 'NEE:'
#    print clean_dep
#    print
#    print 'TA:'
#    print data[indepvar]
#    print
#    # TODO: remove

    # define inner function to be used for optimization
    def trimmed_residuals(par, nee=clean_dep, temp=data[indepvar], trim_perc=trim_perc):
        """
        (inner) Function to be evaluated at each iteration,
        taking care of handling NAs and trimming residuals.
        Inner function used so extra arguments not needed in
        call to scipy leastsq function

        :param nee: array with (non-cleaned) nee values (dependent variable)
        :type nee: numpy.ndarray
        :param temp: array with (cleaned, no NAs) temperature (independent variable)
        :type temp: numpy.ndarray
        :param rref: reference respiration (1st parameter to be optimized)
        :type rref: float
        :param e0: temperature sensitivity (2nd parameter to be optimized)
        :type e0: float
        :param trim_perc: percentiled to be trimmed off
        :type trim_perc: float
        """
        rref, e0 = par
        prediction = lloyd_taylor(ta=temp, rref=rref, e0=e0)
        residuals = nee - prediction
        nonnan_nee_mask = not_nan(nee)
        residuals[~nonnan_nee_mask] = 0.0

        # NOTE: compareIndex and compindex not used in NT partitioning code

        if trim_perc == 0.0:
            return residuals

        absolute_residuals = numpy.abs(residuals)
        pct_calc = pct(absolute_residuals, 100.0 - trim_perc)
        trim_mask = (absolute_residuals > pct_calc)
        residuals[trim_mask] = 0.0

#        print 'rref/e0/res:', rref, e0, numpy.sum(residuals ** 2)

        return residuals

    parameters, std_devs, ls_status, ls_msg, residuals, covariance_matrix = least_squares(func=trimmed_residuals,
                                                                                          initial_guess=xguess,
                                                                                          entries=len(clean_dep),
                                                                                          iterations=1000 * (len(clean_dep) + 1),
                                                                                          return_residuals_cov_mat=True)
    est_rref, est_e0 = parameters
    est_rref_std, est_e0_std = std_devs

    tvalue, pvalue = ttest_ind(clean_dep, lloyd_taylor(ta=data[indepvar], rref=est_rref, e0=est_e0))
    fvalue, f_pvalue = f_oneway(clean_dep, lloyd_taylor(ta=data[indepvar], rref=est_rref, e0=est_e0))
    nee_std, ta_std = numpy.nanstd(clean_dep), numpy.nanstd(data[indepvar])

#    print "rref:", est_rref
#    print "rref_se:", est_rref_std
#    print "e0:", est_e0
#    print "e0_se:", est_e0_std
#    print "p-value:", pvalue, tvalue
#    print "p-value (f):", f_pvalue, fvalue
#    print
#    # TODO: remove

#    _log.debug("Finished optimization step for period '{ts1}' - '{ts2}'  -  parameters rref: {r} ({rs}), e0: {e} ({es})*********".format(ts1=first_ts.strftime('%Y-%m-%d %H:%M'),
#                                                                                                                                         ts2=last_ts.strftime('%Y-%m-%d %H:%M'),
#                                                                                                                                         r=est_rref, e=est_e0,
#                                                                                                                                         rs=est_rref_std, es=est_e0_std))

    # NOTE: calculation of residuals in the original code used only for graph, so not added here

    # add zeros to residuals, if lenght of residuals less than maximum number of entries for window
    # done to match original code, purpose unclear
    if len(residuals) < 48 * WINDOW_SIZE:
        new_residuals = numpy.zeros(48 * WINDOW_SIZE, dtype=FLOAT_PREC)
        new_residuals[:len(residuals)] = residuals
    else:
        new_residuals = residuals

    # NOTE: if changing return values, also update case for "not enough data points"
    return status, est_rref, est_e0, est_rref_std, est_e0_std, new_residuals, covariance_matrix, ls_status, ls_msg, pvalue, nee_std, ta_std