def rebalance_valleys(reduction_result, peaks, pneg): rr = reduction_result error = rr.total_mass_error x = rr.flux.times y = rr.flux.values r_x = rr.reduced_flux.times r_y = rr.reduced_flux.values segments, t_mass = build_segments(rr, peaks, pneg, .5) #if abs(error) > t_mass: # print("*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; increasting inflection points from 50% to 75% of valley area".format(error,t_mass)) # segments,t_mass = build_segments(rr,peaks,pneg,.75) if abs(error) > t_mass: print( "*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; unable to correct mass_error" .format(error, t_mass)) return rr adj_dict = adjust_flux(segments, error) for i in range(r_x.size): year = r_x[i] if year in adj_dict.keys(): r_y[i] = adj_dict[year] adjusted = TimeSeries(r_x, r_y, None, None) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) return rr
def retain_min_years(r_ts,o_ts,o_mass,min_years_ind): years = o_ts.times[min_years_ind] if r_ts.times[0] != years[0]: r_ts.times = np.insert(r_ts.times,0,years[0]) r_ts.values = np.insert(r_ts.values,0,o_ts.values[min_years_ind[0]]) if len(years) > 2: for ind in range(1,len(years-2)): if not np.any(r_ts.times == years[ind]): pos = np.where(r_ts.times > years[ind])[0][0] if pos < 0: pos = np.where(r_ts.times < years[ind])[0][-1] r_ts.times = np.insert(r_ts.times,pos,years[ind]) r_ts.values = np.insert(r_ts.values,pos,o_ts.values[min_years_ind[ind]]) if r_ts.times[-1] != years[-1]: ind = r_ts.times.size-1 r_ts.times = np.insert(r_ts.times,ind,years[-1]) r_ts.values = np.insert(r_ts.values,ind,o_ts.values[min_years_ind[-1]]) reduced_mass = tsmath.integrate(r_ts) return ReductionResult( flux=o_ts, mass=o_mass, reduced_flux=r_ts, reduced_mass=reduced_mass)
def reduce_flux(flux, epsilon, close_gaps, gap_delta, gap_steps): mass = tsmath.integrate(flux) reduced_flux, reduced_mass = reduce_timeseries(flux, epsilon, close_gaps, gap_delta, gap_steps) result = ReductionResult(flux=flux, mass=mass, reduced_flux=reduced_flux, reduced_mass=reduced_mass) return result
def reduce_flux(flux, threshold_area, threshold_peak, solve_type, simple_peaks): mass = tsmath.integrate(flux) reduced_flux, reduced_mass = reduce_timeseries(flux, threshold_area, threshold_peak, mass, solve_type, simple_peaks) result = ReductionResult(flux=flux, mass=mass, reduced_flux=reduced_flux, reduced_mass=reduced_mass) return result
def reduce_timeseries(timeseries, threshold_area, threshold_peak, mass, solve_type=RAW, simple_peaks=False): x = timeseries.times y = timeseries.values peaks, _ = sig.find_peaks(y) peaks = x[peaks] pneg, _ = sig.find_peaks(-y) pneg = x[pneg] peak_width = 1 while peaks.size > 10: peak_width += 1 peaks, _ = sig.find_peaks(y, width=peak_width, rel_height=1) peaks = x[peaks] pneg, _ = sig.find_peaks(-y, width=peak_width, rel_height=1) pneg = x[pneg] required_slope = x[np.divide( np.abs(np.diff(y, prepend=0)), y, where=(y > 0.05 * np.max(y))) > 0.20] required_slope = [i - 1 for i in required_slope] if simple_peaks: peaks = np.array([x[np.argmax(timeseries.values)]]) pneg = [] required_slope = np.array([]) if solve_type == SMOOTH: ts_smooth = tsmath.smooth(timeseries) y = ts_smooth.values r = redcon.reducer( (x, y), threshold_area=threshold_area, threshold_peak=threshold_peak, ) flat_reduced_x = set(redcon.flatten_reduced(r)) required = {x[0], x[-1]} xout = sorted(list(flat_reduced_x.union(required)\ .union(peaks).union(pneg).union(required_slope) )) reduced_flux = timeseries.subset(xout) reduced_mass = tsmath.integrate(reduced_flux) return reduced_flux, reduced_mass
def reduce_timeseries(timeseries, epsilon, close_gaps, gap_delta, gap_steps): x = timeseries.times y = timeseries.values #reduced dataset includes significant slope deltas (where the (y2-y1)/y2 > 0.2) and y > 0.05*max y required_slope = x[np.divide( np.abs(np.diff(y, prepend=0)), y, where=(y > 0.05 * np.max(y))) > 0.2] #grab timesteps on either side of "required slope" timestep and add to the required slope list required_slope_lower = [i - 1 for i in required_slope if i != x[0]] required_slope_upper = [i + 1 for i in required_slope if i != x[-1]] required_slope = sorted( [*{*[*required_slope, *required_slope_upper, *required_slope_lower]}]) #normalize the fluxes for the RDP reduction algorithm y_normalized = y / np.max(y) list_xy = list(zip(x, y_normalized)) rdp_list = rdp(list_xy, epsilon) #parse the returned reduced dataset into timesteps and normalized fluxes (normalized fluxes don't need to be #converted back because not used for reduced fluxes [handled by timeseries.subset()] rdp_x = [int(pair[0]) for pair in rdp_list] rdp_y = [pair[1] for pair in rdp_list] #for some datasets which large gaps between timesteps, reduction error is improved by adding add'l timesteps #user-defined in JSON config file whether applied or not x_gaps = [] if close_gaps.lower() == "true": if np.where(np.diff(rdp_x) > gap_delta): for index in (np.where(np.diff(rdp_x) > gap_delta))[0]: #check to see if slope between timesteps > 0 and if so fill in gaps with timesteps if abs(np.diff(rdp_y)[index]) > 0: x_gaps += [ timestep for timestep in range( rdp_x[index], rdp_x[index + 1], int((np.diff(rdp_x)[index]) / [(gap_steps + 1)])) ] xout = sorted(set([*required_slope, *rdp_x, *x_gaps])) reduced_flux = timeseries.subset(xout) reduced_mass = tsmath.integrate(reduced_flux) return reduced_flux, reduced_mass
def rebalance(reduction_result): """ return a new ReductionResult flux, mass such that the total mass difference is 0 """ rr = reduction_result #minvalue = np.min(rr.reduced_flux.values)*.0001 deltaM = rr.total_mass_error vals = rr.reduced_flux.values times = rr.reduced_flux.times # equal application dt = times[-1] - times[0] vals += deltaM / dt adjusted = rr.reduced_flux.from_values(values=vals) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) return rr
def integrate(self): return ts_math.integrate(self)
def reduct_iter(timeseries,flux_floor,ythresh,out_error,out_error_last,OUT_ERROR_THRESHOLD,UPPER_N,LOWER_N,last_result,MAX_ITERATIONS, algo="iter"): out_error_last = out_error prev_point_count = 0 mass = timeseries.integrate() good_result=ReductionResult( flux=timeseries, mass=mass, reduced_flux=timeseries, reduced_mass=mass) last_result = ReductionResult( flux=timeseries, mass=mass, reduced_flux=timeseries, reduced_mass=mass) epsilon = ythresh mult_by = .5 for ix in range(MAX_ITERATIONS): #execute Ramer–Douglas–Peucker_algorithm temp = rdp.rdp(np.stack((timeseries.times,timeseries.values), axis=-1), epsilon=epsilon,algo=algo) #find the relative error reduced_flux = TimeSeries(temp[:, 0],temp[:, 1],None,None) reduced_mass = tsmath.integrate(reduced_flux) res = ReductionResult( flux=timeseries, mass=mass, reduced_flux=reduced_flux, reduced_mass=reduced_mass) out_error = abs(res.relative_total_mass_error) # if relative error below error threshold record result if out_error < OUT_ERROR_THRESHOLD: #if num of points greater than the lower point bound then we are done # exit loop if res.reduced_flux.times.size >= LOWER_N: last_result = res break #if num of points is smaller than the lower point bound but has more # points than previously found tries then keep this as a potential # good data set. elif res.reduced_flux.times.size > prev_point_count: prev_point_count = res.reduced_flux.times.size good_result = res #reduce epsilon to increase number of points found if epsilon * mult_by > flux_floor: epsilon = epsilon * mult_by else: #previous reduction was not good, try reducing epsilon slower mult_by = mult_by * .5 epsilon = ythresh if epsilon * mult_by > flux_floor: epsilon = epsilon * mult_by else: break last_result = res if prev_point_count > 0: if last_result.reduced_flux.times.size > UPPER_N or out_error_last > OUT_ERROR_THRESHOLD: if good_result.reduced_flux.times.size < UPPER_N: last_result = good_result return last_result,ix
def rebalance_extra_points(reduction_result, num_points=10): #---------------------- # def find_mean_dif_day(): diff = rr.diff_mass #m_diff = max(abs(diff.values)) m_diff = np.mean(abs(diff.values)) if m_diff > 0: #ind = np.flatnonzero(abs(diff.values) == m_diff)[0] ind = np.flatnonzero(abs(diff.values) >= m_diff)[0] return diff.times[ind] return -1 #-------------------- # def check_zero_fluxes(): points = num_points times = rr.reduced_flux.times vals = rr.reduced_flux.values zero_inds = np.flatnonzero(rr.flux.values == 0) series = [] result = [series] expect = None step = 1 #loop through indexes and find consecutive zeros for v in zero_inds: if (v == expect) or (expect is None): series.append(v) else: run = [v] result.append(series) expect = v + step # for r in result: #leave a few points for adding in strategice points. if points <= 10: break if len(r) > 5: times, vals = insert_point(times, vals, rr.flux.times[r[0]], rr.flux.values[r[0]]) times, vals = insert_point(times, vals, rr.flux.times[r[-1]], rr.flux.values[r[-1]]) points -= 2 return points, times, vals rr = reduction_result points, times, vals = check_zero_fluxes() adjusted = TimeSeries(times, vals, None, None) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) #loop through and add mid points at strategic places. for x in range(points): diff_day = find_mean_dif_day() #if diff_day == -1, then max_diff was 0, which means there is nothing to # correct. if diff_day == -1: return rr times = rr.reduced_flux.times vals = rr.reduced_flux.values start_ind = np.flatnonzero(times < diff_day)[-1] end_ind = np.flatnonzero(times >= diff_day)[0] mid_day = 0 #zero_inds = np.flatnonzero(vals[start_ind:end_ind] == 0) #if zero_inds.size > 0: # mid_point = zero_inds[-1] # mid_day = times[mid_point] #else: start_day = times[start_ind] end_day = times[end_ind] mid_day = ((end_day - start_day) / 2) + start_day mid_point = np.flatnonzero(rr.flux.times >= mid_day)[0] if not mid_day in times: times, vals = insert_point(times, vals, rr.flux.times[mid_point], rr.flux.values[mid_point]) adjusted = TimeSeries(times, vals, None, None) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) if abs(rr.total_mass_error / rr.mass.values[-1]) * 100 < .001: break return rr
def reduce_dataset(timeseries, summary_file, output_folder, input_data): """ take a TimeSeries object and reduce it. write a summary into summary_folder """ copc = timeseries.copc site = timeseries.site if timeseries.are_all_zero(): logging.info("Skipped {} {} - all zero".format(copc, site)) return False #for site/copc with nonzero fluxes, save unreduced timeseries to o_timeseries (original) for error correction (below) o_timeseries = timeseries # grab user-defined constant values from input JSON file close_gaps = input_data[c.GAP_CLOSED].lower() if input_data[c.GAP_DELTA]: gap_delta = int(input_data[c.GAP_DELTA]) else: gap_delta = 0 if input_data[c.GAP_STEPS]: gap_steps = int(input_data[c.GAP_STEPS]) else: gap_steps = 0 diff_mass = input_data[c.DIFF_MASS].lower() #Placeholder code if flux_floor is needed in the future #if input_data[c.FLUX_FLOOR_KEY] is not "": # flux_floor = float(input_data[c.FLUX_FLOOR_KEY]) #else: # flux_floor = "" # logging.info("no flux floor value is being applied") upper_n = int(input_data[c.UPPER_N_KEY]) lower_n = int(input_data[c.LOWER_N_KEY]) #the maximum number of reduction iterations max_iters = int(input_data[c.MAX_ITERATIONS_KEY]) #the maximum number of iterations for mass error redistribution max_err_iters = int(input_data[c.MAX_ERR_ITERATIONS_KEY]) epsilon = float(input_data[c.EPSILON]) res = red_flux.reduce_flux(timeseries, epsilon, close_gaps, gap_delta, gap_steps) out_error = abs(res.relative_total_mass_error) out_error_last = out_error last_timesteps = 0 if res.mass.values[-1] > float(input_data[c.MASS_THRESHOLD]): out_error_threshold = float( input_data[c.LOWER_OUT_ERROR_THRESHOLD_KEY]) else: out_error_threshold = float( input_data[c.UPPER_OUT_ERROR_THRESHOLD_KEY]) for ix in range(max_iters): timesteps = len(res.reduced_flux) out_error = abs(res.relative_total_mass_error) #if the timesteps are within the acceptable range and error < error threshold --> done if timesteps <= upper_n and timesteps >= lower_n and out_error <= out_error_threshold: last_result = res used_epsilon = epsilon break elif timesteps < lower_n: epsilon = epsilon / 2 elif timesteps <= upper_n and timesteps >= lower_n and out_error > out_error_threshold and out_error < out_error_last: epsilon = epsilon / 2 elif timesteps <= upper_n and timesteps >= lower_n and out_error > out_error_threshold and last_timesteps <= timesteps: epsilon = epsilon / 2 #after exceeding max points then iterate between upper_n and last epsilon where timesteps < upper_n elif timesteps > upper_n and last_timesteps < timesteps: epsilon = epsilon * 1.75 elif timesteps <= last_timesteps: epsilon = epsilon / 1.5 #keep the result as the last result only if timesteps are < max and if error is lower than previous result if timesteps <= upper_n and out_error <= out_error_last: last_result = res last_timesteps = timesteps out_error_last = out_error used_epsilon = epsilon res = red_flux.reduce_flux(timeseries, epsilon, close_gaps, gap_delta, gap_steps) if ix >= max_iters - 1: logging.info("MAX ITERATIONS exceeded") n_iterations = ix + 1 if diff_mass == "true": #check error in cummulative mass differences of reduced and original dataset after reduction mass = last_result.mass r_mass = last_result.reduced_mass dmass = mass - r_mass diff_iter = 0 corrected = False while max(dmass.values) / mass.values[ -1] > out_error_threshold and diff_iter < max_err_iters: # or abs(min(dmass.values))/mass.values[-1] > out_error_threshold: year_err = dmass.times[np.where( dmass.values == max(dmass.values))].tolist()[0] year2 = r_mass.times[np.where(r_mass.times > year_err)][0] year1 = r_mass.times[np.where(r_mass.times < year_err)][-1] interval = int((year2 - year1) / 2) diff_iter += 1 #adding those years where the timesteps on either side of max diff spans > 4 years if interval >= 2: years = [ year + interval for year in range(year1, year2, interval) ][0:-1] revised_years = sorted(set([*r_mass.times.tolist(), *years])) r_flux = timeseries.subset(revised_years) r_mass = tsmath.integrate(r_flux) dmass = mass - r_mass corrected = True diff_iter += 1 if corrected: last_result = ReductionResult(flux=last_result.flux, mass=last_result.mass, reduced_flux=r_flux, reduced_mass=r_mass) out_error_last = abs(last_result.relative_total_mass_error) delta_mass = last_result.total_mass_error # for tracking reduction of error through the iterations.... max_err = last_result.total_mass_error min_err = last_result.total_mass_error iter = 0 rr = last_result if abs(last_result.relative_total_mass_error) > out_error_threshold: #find peaks for data rebalance and reporting #Note: departure from HSS algorithm--no peak width consideration for solid waste result reduction peaks, _ = sig.find_peaks(rr.reduced_flux.values) pneg, _ = sig.find_peaks((-rr.reduced_flux.values)) peaks = rr.reduced_flux.times[peaks] pneg = rr.reduced_flux.times[pneg] peaks = np.isin(o_timeseries.times, peaks) pneg = np.isin(o_timeseries.times, pneg) peaks = np.where(peaks) pneg = np.where(pneg) peaks = peaks[0] pneg = pneg[0] while abs(last_result.relative_total_mass_error ) > out_error_threshold and iter < max_err_iters: rr = red_flux.rebalance_valleys(rr, peaks, pneg) #keep the lowest total_mass_error if abs(rr.total_mass_error) < abs(last_result.total_mass_error): last_result = rr min_err = rr.total_mass_error else: max_err = rr.total_mass_error iter += 1 logging.info( "min error: {}; max error: {}--after rebalance iterations {}".format( min_err, max_err, iter)) #end of Neil's code... plot_file = summary_plot(last_result, output_folder) filename = last_result.to_csv(output_folder) git_path = None git_hash = '' p = Path(__file__) for path in p.parents: if (path.joinpath('.git').exists()): git_path = (str(path.joinpath('.git'))) break if git_path: git_hash = get_version(git_path) # eventually update a module with insert_header information functionality but keeping it here for now... header_info = 'Site Name: {}\n Date Created: {}\n Script Version: {} \nCOPC: {} \n'.format( rr.mass.site, datetime.datetime.now().strftime('%Y/%m/%d'), git_hash, rr.mass.copc) with open(filename, 'r+') as f: old = f.read() f.seek(0) f.write(header_info + old) summary_template = input_data["SUMMARY_TEMPLATE"] + '\n' summary_info(last_result, filename, summary_file, summary_template, delta_mass, used_epsilon, n_iterations, out_error_last) log_info(last_result)