def rebalance_valleys(reduction_result, peaks, pneg):
    rr = reduction_result

    error = rr.total_mass_error
    x = rr.flux.times
    y = rr.flux.values
    r_x = rr.reduced_flux.times
    r_y = rr.reduced_flux.values

    segments, t_mass = build_segments(rr, peaks, pneg, .5)

    #if abs(error) > t_mass:
    #    print("*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; increasting inflection points from 50% to 75% of valley area".format(error,t_mass))
    #    segments,t_mass = build_segments(rr,peaks,pneg,.75)
    if abs(error) > t_mass:
        print(
            "*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; unable to correct mass_error"
            .format(error, t_mass))
        return rr
    adj_dict = adjust_flux(segments, error)

    for i in range(r_x.size):
        year = r_x[i]
        if year in adj_dict.keys():
            r_y[i] = adj_dict[year]

    adjusted = TimeSeries(r_x, r_y, None, None)

    reduced_mass = tsmath.integrate(adjusted)
    rr = ReductionResult(flux=rr.flux,
                         mass=rr.mass,
                         reduced_flux=adjusted,
                         reduced_mass=reduced_mass)
    return rr
Beispiel #2
0
def retain_min_years(r_ts,o_ts,o_mass,min_years_ind):

    years = o_ts.times[min_years_ind]
    if r_ts.times[0] != years[0]:
        r_ts.times = np.insert(r_ts.times,0,years[0])
        r_ts.values = np.insert(r_ts.values,0,o_ts.values[min_years_ind[0]])
    if len(years) > 2:
        for ind in range(1,len(years-2)):
            if not np.any(r_ts.times == years[ind]):
                pos = np.where(r_ts.times > years[ind])[0][0]
                if pos < 0:
                    pos = np.where(r_ts.times < years[ind])[0][-1]
                r_ts.times = np.insert(r_ts.times,pos,years[ind])
                r_ts.values = np.insert(r_ts.values,pos,o_ts.values[min_years_ind[ind]])

    if r_ts.times[-1] != years[-1]:
        ind = r_ts.times.size-1
        r_ts.times = np.insert(r_ts.times,ind,years[-1])
        r_ts.values = np.insert(r_ts.values,ind,o_ts.values[min_years_ind[-1]])
    reduced_mass = tsmath.integrate(r_ts)
    return ReductionResult(
            flux=o_ts,
            mass=o_mass,
            reduced_flux=r_ts,
            reduced_mass=reduced_mass)
Beispiel #3
0
def reduce_flux(flux, epsilon, close_gaps, gap_delta, gap_steps):
    mass = tsmath.integrate(flux)
    reduced_flux, reduced_mass = reduce_timeseries(flux, epsilon, close_gaps,
                                                   gap_delta, gap_steps)

    result = ReductionResult(flux=flux,
                             mass=mass,
                             reduced_flux=reduced_flux,
                             reduced_mass=reduced_mass)

    return result
def reduce_flux(flux, threshold_area, threshold_peak, solve_type,
                simple_peaks):
    mass = tsmath.integrate(flux)
    reduced_flux, reduced_mass = reduce_timeseries(flux, threshold_area,
                                                   threshold_peak, mass,
                                                   solve_type, simple_peaks)

    result = ReductionResult(flux=flux,
                             mass=mass,
                             reduced_flux=reduced_flux,
                             reduced_mass=reduced_mass)
    return result
def reduce_timeseries(timeseries,
                      threshold_area,
                      threshold_peak,
                      mass,
                      solve_type=RAW,
                      simple_peaks=False):
    x = timeseries.times
    y = timeseries.values
    peaks, _ = sig.find_peaks(y)
    peaks = x[peaks]
    pneg, _ = sig.find_peaks(-y)
    pneg = x[pneg]
    peak_width = 1
    while peaks.size > 10:
        peak_width += 1
        peaks, _ = sig.find_peaks(y, width=peak_width, rel_height=1)
        peaks = x[peaks]
        pneg, _ = sig.find_peaks(-y, width=peak_width, rel_height=1)
        pneg = x[pneg]

    required_slope = x[np.divide(
        np.abs(np.diff(y, prepend=0)), y, where=(y > 0.05 * np.max(y))) > 0.20]

    required_slope = [i - 1 for i in required_slope]

    if simple_peaks:
        peaks = np.array([x[np.argmax(timeseries.values)]])
        pneg = []
        required_slope = np.array([])

    if solve_type == SMOOTH:
        ts_smooth = tsmath.smooth(timeseries)
        y = ts_smooth.values
    r = redcon.reducer(
        (x, y),
        threshold_area=threshold_area,
        threshold_peak=threshold_peak,
    )

    flat_reduced_x = set(redcon.flatten_reduced(r))

    required = {x[0], x[-1]}

    xout = sorted(list(flat_reduced_x.union(required)\
            .union(peaks).union(pneg).union(required_slope)
           ))
    reduced_flux = timeseries.subset(xout)
    reduced_mass = tsmath.integrate(reduced_flux)

    return reduced_flux, reduced_mass
Beispiel #6
0
def reduce_timeseries(timeseries, epsilon, close_gaps, gap_delta, gap_steps):
    x = timeseries.times
    y = timeseries.values

    #reduced dataset includes significant slope deltas (where the (y2-y1)/y2 > 0.2) and y > 0.05*max y
    required_slope = x[np.divide(
        np.abs(np.diff(y, prepend=0)), y, where=(y > 0.05 * np.max(y))) > 0.2]

    #grab timesteps on either side of "required slope" timestep and add to the required slope list
    required_slope_lower = [i - 1 for i in required_slope if i != x[0]]
    required_slope_upper = [i + 1 for i in required_slope if i != x[-1]]
    required_slope = sorted(
        [*{*[*required_slope, *required_slope_upper, *required_slope_lower]}])

    #normalize the fluxes for the RDP reduction algorithm
    y_normalized = y / np.max(y)
    list_xy = list(zip(x, y_normalized))

    rdp_list = rdp(list_xy, epsilon)
    #parse the returned reduced dataset into timesteps and normalized fluxes (normalized fluxes don't need to be
    #converted back because not used for reduced fluxes [handled by timeseries.subset()]
    rdp_x = [int(pair[0]) for pair in rdp_list]
    rdp_y = [pair[1] for pair in rdp_list]

    #for some datasets which large gaps between timesteps, reduction error is improved by adding add'l timesteps
    #user-defined in JSON config file whether applied or not
    x_gaps = []
    if close_gaps.lower() == "true":
        if np.where(np.diff(rdp_x) > gap_delta):
            for index in (np.where(np.diff(rdp_x) > gap_delta))[0]:
                #check to see if slope between timesteps > 0 and if so fill in gaps with timesteps
                if abs(np.diff(rdp_y)[index]) > 0:
                    x_gaps += [
                        timestep for timestep in range(
                            rdp_x[index], rdp_x[index + 1],
                            int((np.diff(rdp_x)[index]) / [(gap_steps + 1)]))
                    ]

    xout = sorted(set([*required_slope, *rdp_x, *x_gaps]))

    reduced_flux = timeseries.subset(xout)
    reduced_mass = tsmath.integrate(reduced_flux)

    return reduced_flux, reduced_mass
def rebalance(reduction_result):
    """
        return a new ReductionResult
        flux, mass such that the total mass difference is 0
    """
    rr = reduction_result
    #minvalue = np.min(rr.reduced_flux.values)*.0001
    deltaM = rr.total_mass_error
    vals = rr.reduced_flux.values
    times = rr.reduced_flux.times
    # equal application
    dt = times[-1] - times[0]
    vals += deltaM / dt

    adjusted = rr.reduced_flux.from_values(values=vals)
    reduced_mass = tsmath.integrate(adjusted)
    rr = ReductionResult(flux=rr.flux,
                         mass=rr.mass,
                         reduced_flux=adjusted,
                         reduced_mass=reduced_mass)
    return rr
Beispiel #8
0
 def integrate(self):
     return ts_math.integrate(self)
Beispiel #9
0
def reduct_iter(timeseries,flux_floor,ythresh,out_error,out_error_last,OUT_ERROR_THRESHOLD,UPPER_N,LOWER_N,last_result,MAX_ITERATIONS, algo="iter"):
    out_error_last = out_error
    prev_point_count = 0

    mass = timeseries.integrate()
    good_result=ReductionResult(
                flux=timeseries,
                mass=mass,
                reduced_flux=timeseries,
                reduced_mass=mass)
    last_result = ReductionResult(
                flux=timeseries,
                mass=mass,
                reduced_flux=timeseries,
                reduced_mass=mass)
    epsilon = ythresh
    mult_by = .5

    for ix in range(MAX_ITERATIONS):
        #execute Ramer–Douglas–Peucker_algorithm
        temp = rdp.rdp(np.stack((timeseries.times,timeseries.values), axis=-1), epsilon=epsilon,algo=algo)
        #find the relative error
        reduced_flux = TimeSeries(temp[:, 0],temp[:, 1],None,None)
        reduced_mass = tsmath.integrate(reduced_flux)
        res = ReductionResult(
                flux=timeseries,
                mass=mass,
                reduced_flux=reduced_flux,
                reduced_mass=reduced_mass)

        out_error = abs(res.relative_total_mass_error)
        # if relative error below error threshold record result
        if out_error < OUT_ERROR_THRESHOLD:
            #if num of points greater than the lower point bound then we are done
            # exit loop
            if res.reduced_flux.times.size >= LOWER_N:
                last_result = res
                break
            #if num of points is smaller than the lower point bound but has more
            # points than previously found tries then keep this as a potential
            # good data set.
            elif res.reduced_flux.times.size > prev_point_count:
                prev_point_count = res.reduced_flux.times.size
                good_result = res
        #reduce epsilon to increase number of points found
        if epsilon * mult_by > flux_floor:
            epsilon = epsilon * mult_by
        else:
            #previous reduction was not good, try reducing epsilon slower
            mult_by = mult_by * .5
            epsilon = ythresh
            if epsilon * mult_by > flux_floor:
                epsilon = epsilon * mult_by
            else:
                break

        last_result = res
    if prev_point_count > 0:
        if last_result.reduced_flux.times.size > UPPER_N or out_error_last > OUT_ERROR_THRESHOLD:
            if good_result.reduced_flux.times.size < UPPER_N:
                last_result = good_result

    return last_result,ix
def rebalance_extra_points(reduction_result, num_points=10):
    #----------------------
    #
    def find_mean_dif_day():
        diff = rr.diff_mass
        #m_diff = max(abs(diff.values))
        m_diff = np.mean(abs(diff.values))
        if m_diff > 0:
            #ind = np.flatnonzero(abs(diff.values) == m_diff)[0]
            ind = np.flatnonzero(abs(diff.values) >= m_diff)[0]
            return diff.times[ind]
        return -1

    #--------------------
    #
    def check_zero_fluxes():
        points = num_points
        times = rr.reduced_flux.times
        vals = rr.reduced_flux.values
        zero_inds = np.flatnonzero(rr.flux.values == 0)
        series = []
        result = [series]
        expect = None
        step = 1
        #loop through indexes and find consecutive zeros
        for v in zero_inds:
            if (v == expect) or (expect is None):
                series.append(v)
            else:
                run = [v]
                result.append(series)
            expect = v + step
        #

        for r in result:
            #leave a few points for adding in strategice points.
            if points <= 10:
                break
            if len(r) > 5:
                times, vals = insert_point(times, vals, rr.flux.times[r[0]],
                                           rr.flux.values[r[0]])
                times, vals = insert_point(times, vals, rr.flux.times[r[-1]],
                                           rr.flux.values[r[-1]])
                points -= 2
        return points, times, vals

    rr = reduction_result
    points, times, vals = check_zero_fluxes()
    adjusted = TimeSeries(times, vals, None, None)
    reduced_mass = tsmath.integrate(adjusted)
    rr = ReductionResult(flux=rr.flux,
                         mass=rr.mass,
                         reduced_flux=adjusted,
                         reduced_mass=reduced_mass)
    #loop through and add mid points at strategic places.
    for x in range(points):
        diff_day = find_mean_dif_day()
        #if diff_day == -1, then max_diff was 0, which means there is nothing to
        # correct.
        if diff_day == -1:
            return rr
        times = rr.reduced_flux.times
        vals = rr.reduced_flux.values
        start_ind = np.flatnonzero(times < diff_day)[-1]
        end_ind = np.flatnonzero(times >= diff_day)[0]
        mid_day = 0
        #zero_inds = np.flatnonzero(vals[start_ind:end_ind] == 0)
        #if zero_inds.size > 0:
        #    mid_point = zero_inds[-1]
        #    mid_day = times[mid_point]
        #else:
        start_day = times[start_ind]
        end_day = times[end_ind]
        mid_day = ((end_day - start_day) / 2) + start_day
        mid_point = np.flatnonzero(rr.flux.times >= mid_day)[0]
        if not mid_day in times:
            times, vals = insert_point(times, vals, rr.flux.times[mid_point],
                                       rr.flux.values[mid_point])
        adjusted = TimeSeries(times, vals, None, None)
        reduced_mass = tsmath.integrate(adjusted)
        rr = ReductionResult(flux=rr.flux,
                             mass=rr.mass,
                             reduced_flux=adjusted,
                             reduced_mass=reduced_mass)

        if abs(rr.total_mass_error / rr.mass.values[-1]) * 100 < .001:
            break
    return rr
Beispiel #11
0
def reduce_dataset(timeseries, summary_file, output_folder, input_data):
    """ take a TimeSeries object and reduce it.

    write a summary into summary_folder

    """
    copc = timeseries.copc
    site = timeseries.site
    if timeseries.are_all_zero():
        logging.info("Skipped {} {} - all zero".format(copc, site))
        return False

    #for site/copc with nonzero fluxes, save unreduced timeseries to o_timeseries (original) for error correction (below)
    o_timeseries = timeseries

    # grab user-defined constant values from input JSON file
    close_gaps = input_data[c.GAP_CLOSED].lower()
    if input_data[c.GAP_DELTA]:
        gap_delta = int(input_data[c.GAP_DELTA])
    else:
        gap_delta = 0
    if input_data[c.GAP_STEPS]:
        gap_steps = int(input_data[c.GAP_STEPS])
    else:
        gap_steps = 0

    diff_mass = input_data[c.DIFF_MASS].lower()

    #Placeholder code if flux_floor is needed in the future
    #if input_data[c.FLUX_FLOOR_KEY] is not "":
    #    flux_floor = float(input_data[c.FLUX_FLOOR_KEY])

    #else:
    #    flux_floor = ""
    #    logging.info("no flux floor value is being applied")

    upper_n = int(input_data[c.UPPER_N_KEY])
    lower_n = int(input_data[c.LOWER_N_KEY])

    #the maximum number of reduction iterations
    max_iters = int(input_data[c.MAX_ITERATIONS_KEY])

    #the maximum number of iterations for mass error redistribution
    max_err_iters = int(input_data[c.MAX_ERR_ITERATIONS_KEY])

    epsilon = float(input_data[c.EPSILON])

    res = red_flux.reduce_flux(timeseries, epsilon, close_gaps, gap_delta,
                               gap_steps)
    out_error = abs(res.relative_total_mass_error)

    out_error_last = out_error
    last_timesteps = 0

    if res.mass.values[-1] > float(input_data[c.MASS_THRESHOLD]):
        out_error_threshold = float(
            input_data[c.LOWER_OUT_ERROR_THRESHOLD_KEY])
    else:
        out_error_threshold = float(
            input_data[c.UPPER_OUT_ERROR_THRESHOLD_KEY])

    for ix in range(max_iters):
        timesteps = len(res.reduced_flux)
        out_error = abs(res.relative_total_mass_error)

        #if the timesteps are within the acceptable range and error < error threshold --> done
        if timesteps <= upper_n and timesteps >= lower_n and out_error <= out_error_threshold:
            last_result = res
            used_epsilon = epsilon
            break

        elif timesteps < lower_n:
            epsilon = epsilon / 2

        elif timesteps <= upper_n and timesteps >= lower_n and out_error > out_error_threshold and out_error < out_error_last:
            epsilon = epsilon / 2

        elif timesteps <= upper_n and timesteps >= lower_n and out_error > out_error_threshold and last_timesteps <= timesteps:
            epsilon = epsilon / 2

        #after exceeding max points then iterate between upper_n and last epsilon where timesteps < upper_n
        elif timesteps > upper_n and last_timesteps < timesteps:
            epsilon = epsilon * 1.75

        elif timesteps <= last_timesteps:
            epsilon = epsilon / 1.5

        #keep the result as the last result only if timesteps are < max and if error is lower than previous result
        if timesteps <= upper_n and out_error <= out_error_last:
            last_result = res
            last_timesteps = timesteps
            out_error_last = out_error
            used_epsilon = epsilon

        res = red_flux.reduce_flux(timeseries, epsilon, close_gaps, gap_delta,
                                   gap_steps)

    if ix >= max_iters - 1:
        logging.info("MAX ITERATIONS exceeded")

    n_iterations = ix + 1

    if diff_mass == "true":
        #check error in cummulative mass differences of reduced and original dataset after reduction
        mass = last_result.mass
        r_mass = last_result.reduced_mass
        dmass = mass - r_mass

        diff_iter = 0
        corrected = False
        while max(dmass.values) / mass.values[
                -1] > out_error_threshold and diff_iter < max_err_iters:  # or abs(min(dmass.values))/mass.values[-1] > out_error_threshold:
            year_err = dmass.times[np.where(
                dmass.values == max(dmass.values))].tolist()[0]
            year2 = r_mass.times[np.where(r_mass.times > year_err)][0]
            year1 = r_mass.times[np.where(r_mass.times < year_err)][-1]
            interval = int((year2 - year1) / 2)
            diff_iter += 1
            #adding those years where the timesteps on either side of max diff spans > 4 years
            if interval >= 2:
                years = [
                    year + interval for year in range(year1, year2, interval)
                ][0:-1]
                revised_years = sorted(set([*r_mass.times.tolist(), *years]))
                r_flux = timeseries.subset(revised_years)
                r_mass = tsmath.integrate(r_flux)
                dmass = mass - r_mass
                corrected = True

            diff_iter += 1
        if corrected:
            last_result = ReductionResult(flux=last_result.flux,
                                          mass=last_result.mass,
                                          reduced_flux=r_flux,
                                          reduced_mass=r_mass)

    out_error_last = abs(last_result.relative_total_mass_error)
    delta_mass = last_result.total_mass_error

    # for tracking reduction of error through the iterations....
    max_err = last_result.total_mass_error
    min_err = last_result.total_mass_error
    iter = 0
    rr = last_result

    if abs(last_result.relative_total_mass_error) > out_error_threshold:
        #find peaks for data rebalance and reporting
        #Note: departure from HSS algorithm--no peak width consideration for solid waste result reduction
        peaks, _ = sig.find_peaks(rr.reduced_flux.values)
        pneg, _ = sig.find_peaks((-rr.reduced_flux.values))

        peaks = rr.reduced_flux.times[peaks]
        pneg = rr.reduced_flux.times[pneg]

        peaks = np.isin(o_timeseries.times, peaks)
        pneg = np.isin(o_timeseries.times, pneg)
        peaks = np.where(peaks)
        pneg = np.where(pneg)

        peaks = peaks[0]
        pneg = pneg[0]

    while abs(last_result.relative_total_mass_error
              ) > out_error_threshold and iter < max_err_iters:
        rr = red_flux.rebalance_valleys(rr, peaks, pneg)
        #keep the lowest total_mass_error
        if abs(rr.total_mass_error) < abs(last_result.total_mass_error):
            last_result = rr
            min_err = rr.total_mass_error
        else:
            max_err = rr.total_mass_error
        iter += 1

    logging.info(
        "min error: {}; max error: {}--after rebalance iterations {}".format(
            min_err, max_err, iter))

    #end of Neil's code...

    plot_file = summary_plot(last_result, output_folder)
    filename = last_result.to_csv(output_folder)

    git_path = None
    git_hash = ''
    p = Path(__file__)
    for path in p.parents:
        if (path.joinpath('.git').exists()):
            git_path = (str(path.joinpath('.git')))
            break
    if git_path:
        git_hash = get_version(git_path)

# eventually update a module with insert_header information functionality but keeping it here for now...
    header_info = 'Site Name: {}\n Date Created: {}\n Script Version: {} \nCOPC: {} \n'.format(
        rr.mass.site,
        datetime.datetime.now().strftime('%Y/%m/%d'), git_hash, rr.mass.copc)

    with open(filename, 'r+') as f:
        old = f.read()
        f.seek(0)
        f.write(header_info + old)

    summary_template = input_data["SUMMARY_TEMPLATE"] + '\n'
    summary_info(last_result, filename, summary_file, summary_template,
                 delta_mass, used_epsilon, n_iterations, out_error_last)
    log_info(last_result)