def add_zero_markers(o_ts, r_ts, flux_floor): new_ts = r_ts r_times = r_ts.times r_values = r_ts.values num_steps = o_ts.times.size if (o_ts.values[0] <= 0): zero_ind = np.where(o_ts.values > 0)[0][0] - 1 if not np.any(r_times == o_ts.times[zero_ind]): r_times, r_values = insert_point(r_times, r_values, o_ts.times[zero_ind], o_ts.values[zero_ind]) #new_ts = TimeSeries(r_times,r_values,None,None) if o_ts.times[0] != r_times[0]: r_times, r_values = insert_point(r_times, r_values, o_ts.times[0], o_ts.values[0]) #new_ts = TimeSeries(r_times,r_values,None,None) zero_ind = 0 if (o_ts.values[-1] <= 0): zero_ind = np.where(o_ts.values > 0)[0][-1] + 1 if zero_ind < num_steps and not np.any( r_times == o_ts.times[zero_ind]): r_times, r_values = insert_point(r_times, r_values, o_ts.times[zero_ind], o_ts.values[zero_ind]) #new_ts = TimeSeries(r_times,r_values,None,None) if o_ts.times[-1] != r_times[-1]: r_times, r_values = insert_point(r_times, r_values, o_ts.times[-1], o_ts.values[-1]) #new_ts = TimeSeries(r_times,r_values,None,None) new_ts = TimeSeries(r_times, r_values, None, None) return ReductionResult(flux=o_ts, mass=o_ts.integrate(), reduced_flux=new_ts, reduced_mass=new_ts.integrate())
def build_segments(rr, peaks, pneg, inflection_area): inf_pts = get_inflection_points(rr.flux, peaks, pneg, inflection_area) #segments,t_mass = build_segments(inf_pts,rr,peaks,pneg) x = rr.flux.times y = rr.flux.values r_x = rr.reduced_flux.times r_y = rr.reduced_flux.values segments = [] segs_total_mass = 0 for years in inf_pts: s_year = x[years[0]] e_year = x[years[1]] if (e_year - s_year) > 4: if not np.any(r_x == x[years[0]]): r_x, r_y = insert_point(r_x, r_y, x[years[0]], y[years[0]]) if not np.any(r_x == x[years[0] + 1]): r_x, r_y = insert_point(r_x, r_y, x[years[0] + 1], y[years[0] + 1]) if not np.any(r_x == x[years[1]]): r_x, r_y = insert_point(r_x, r_y, x[years[1]], y[years[1]]) if not np.any(r_x == x[years[1] - 1]): r_x, r_y = insert_point(r_x, r_y, x[years[1] - 1], y[years[1] - 1]) r_seg = np.where((r_x >= s_year) & (r_x <= e_year)) seg_x = r_x[r_seg] seg_y = r_y[r_seg] timeseries = TimeSeries(seg_x, seg_y, None, None) segs_total_mass += timeseries.integrate().values[-1] #timeseries = TimeSeries(r_x[r_seg],r_y[r_seg],None,None) #timeseries = TimeSeries(r_x[r_start:r_end],r_y[r_start:r_end],None,None) segments.append(timeseries) return segments, segs_total_mass
def summary_plot(o_time, o_flux, r_time, r_flux, i, j, output_folder, units, start_year, graph_name, copc, summary_plot=False): matplotlib.rcParams['axes.formatter.useoffset'] = False """ make a plot of hte reduction result and place it in output_folder""" o_flux = TimeSeries(o_time, o_flux, None, None) r_flux = TimeSeries(r_time, r_flux, None, None) #convert to ci/kg from current unit. o_flux, unit = unit_conversion(o_flux, units, start_year) r_flux, _ = unit_conversion(r_flux, units, start_year) f, ax1, ax2 = reduced_timeseries_plot(o_flux, r_flux, i, j, unit, start_year, copc, summary_plot) file_name = "{}_{}-{}.png".format(graph_name, i, j) if summary_plot: file_name = "{}.png".format(graph_name) plt.savefig(os.path.join(output_folder, file_name), bbox_inches='tight', dpi=1200) plt.close('all') plt.clf()
def adjust_flux(data, error): total_mass = float(0.0) #sum cumulative mass of all segments for seg in data: if len(seg) > 0: ts = TimeSeries(seg.times, seg.values, None, None) temp_series = ts.integrate() total_mass += temp_series.values[-1] adjusted = {} #total_error_perc = float(0.0) #mass_used = float(0.0) #figure precentage to adjust each point by # flux_diff = (total_mass+error)/total_mass for seg in data: #if segment has atleast 3 points (mid points are adjusted) if seg.times.size > 2: x = seg.times #[1:-1] y = seg.values #[1:-1] #ts = TimeSeries(x,y,None,None) mass = seg.integrate().values[-1] #get Percent mass current segment is of the total mass p_mass = mass / total_mass p_error = error / total_mass flux_diff = p_mass * p_error #get find equivalent percentage of total_error #e_mass = error * p_mass #divide reduced total error by total mass of segment #flux_diff = e_mass / mass # if dif is greater than 10% reduce it to 10% if abs(flux_diff) > .1: flux_diff = abs(flux_diff) / flux_diff * 0.1 # if flux_diff >0: # flux_diff = .1 # else: # flux_diff = -.1 #if abs(flux_diff) < 0.001: # flux_diff = abs(flux_diff)/flux_diff *0.001 adjusted[x[0]] = y[0] max_flux = max(y) #for each value (except first and last values) adjust value by percent (flux_diff) for i in range(1, x.size - 1): new_val = y[i] + (y[i] * flux_diff) if new_val > max_flux: new_val = y[i] + ((max_flux - y[i]) * .1) #should not happen but just in case negative numbers not allowed if new_val < 0: new_val = float(0.0) adjusted[x[i]] = new_val return adjusted
def rebalance_valleys(reduction_result, peaks, pneg): rr = reduction_result error = rr.total_mass_error x = rr.flux.times y = rr.flux.values r_x = rr.reduced_flux.times r_y = rr.reduced_flux.values segments, t_mass = build_segments(rr, peaks, pneg, .5) #if abs(error) > t_mass: # print("*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; increasting inflection points from 50% to 75% of valley area".format(error,t_mass)) # segments,t_mass = build_segments(rr,peaks,pneg,.75) if abs(error) > t_mass: print( "*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; unable to correct mass_error" .format(error, t_mass)) return rr adj_dict = adjust_flux(segments, error) for i in range(r_x.size): year = r_x[i] if year in adj_dict.keys(): r_y[i] = adj_dict[year] adjusted = TimeSeries(r_x, r_y, None, None) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) return rr
def unit_conversion(ts, units, start_year): x = ts.times y = ts.values x = (x / 365.25) + start_year y = y * 365.25 new_unit = ['Ci/year', 'Ci'] factor = 1 if units.lower() == 'pci': factor = 1e-12 elif units.lower() in ['kg', 'g', 'ug']: new_unit = ['kg/year', 'kg'] if units.lower() == 'g': factor = 1e-3 elif units.lower() == 'ug': factor = 1e-9 new_ts = TimeSeries(x, y * factor, None, None) return new_ts, new_unit
def find_inflection(x, y, s_ind, e_ind): #Calculate mass ts1 = TimeSeries(x[s_ind:e_ind], y[s_ind:e_ind], None, None) mass = ts1.integrate().values[-1] half_mass = mass / 2 #build loop criteria process from peak to valley #if value at s_ind is < than value at e_ind then e_ind is the peak. # therefore process in reverse loop = range(e_ind, s_ind, -1) reverse = True #start_ind = s_ind #last_ind = e_ind #if value at s_ind is > than value at e_ind then s_ind is the peak. #therefor process in sequence if y[s_ind] > y[e_ind]: loop = range(s_ind, e_ind) reverse = False #starting at peak find the total mass between valley and i. stop when <= half_mass for i in loop: #s_ind is the peak start = s_ind end = i #s_ind is the valley if reverse == False: start = i end = e_ind #calculate mass ts2 = TimeSeries(x[start:end], y[start:end], None, None) mass = ts2.integrate().values[-1] #check if mass <= half mass if mass <= half_mass: return i #should never get to here. if reverse: return e_ind else: return s_ind
def reduce_dataset(years, values, flux_floor=0, max_tm_error=0, min_reduction_steps=200): """ takes times and values and then reduces it returns reduced_times and reduced_values if all elements are zero, it returns False flux_floor > flux == 0 max_tm_error > total mass error """ non_zero_ind, min_retained_zero_years = remove_begin_end_zero_flux( years, values, flux_floor, min_reduction_steps) years_mod = years[non_zero_ind] values_mod = values[non_zero_ind] if years_mod.size < 3: years_mod = years values_mod = values values_mod = 0 else: #makes ure you have not removed more than 1% of the mass when removing 0 or flux floor rates o_mass = TimeSeries(years, values, None, None).integrate().values[-1] r_mass = TimeSeries(years_mod, values_mod, None, None).integrate().values[-1] if abs((o_mass - r_mass) / o_mass) * 100 > 1: years_mod = years values_mod = values timeseries = TimeSeries(years_mod, values_mod, None, None) mass = timeseries.integrate() #normalize Values maxval = np.max(values_mod) values_mod = values_mod / maxval o_timeseries = TimeSeries(years, values / maxval, None, None) o_mass = o_timeseries.integrate() timeseries = TimeSeries(years_mod, values_mod, None, None) mass = timeseries.integrate() mx = np.argmax(timeseries.values) points = [0, mx, len(timeseries)] x = timeseries.times ythresh = 100 * np.mean(timeseries.values) out_error = 1 out_error_last = out_error OUT_ERROR_THRESHOLD = 1e-2 UPPER_N = 200 LOWER_N = 50 last_result = None MAX_ITERATIONS = 80 solve_type = SMOOTH simple_peaks = False last_result, ix = reduct_iter(timeseries, flux_floor, ythresh, out_error, out_error_last, OUT_ERROR_THRESHOLD, UPPER_N, LOWER_N, last_result, MAX_ITERATIONS) last_result = retain_min_years(last_result.reduced_flux, o_timeseries, o_mass, min_retained_zero_years) #if there are less points than the min_reduction_steps then use the remaining #points to rebalance the segments with the largest mass errors. play_points = min_reduction_steps - last_result.num_reduced_points bef = last_result.reduced_flux.times.size if play_points > 0: last_result = red_flux.rebalance_extra_points(last_result, play_points) rr = last_result #find peaks for data rebalance and reporting peaks, _ = sig.find_peaks(rr.reduced_flux.values, width=3, rel_height=1) if peaks.size == 0: peaks, _ = sig.find_peaks(rr.reduced_flux.values, width=2, rel_height=1) if peaks.size == 0: peaks, _ = sig.find_peaks(rr.reduced_flux.values, width=1, rel_height=1) pneg, _ = sig.find_peaks(-rr.reduced_flux.values, width=3, rel_height=1) if pneg.size == 0: pneg, _ = sig.find_peaks(-rr.reduced_flux.values, width=2, rel_height=1) if pneg.size == 0: pneg, _ = sig.find_peaks(-rr.reduced_flux.values, width=1, rel_height=1) peaks = rr.reduced_flux.times[peaks] pneg = rr.reduced_flux.times[pneg] peaks = np.isin(o_timeseries.times, peaks) pneg = np.isin(o_timeseries.times, pneg) peaks = np.where(peaks) pneg = np.where(pneg) peaks = peaks[0] pneg = pneg[0] iter = 0 while iter < 100 and ( abs(last_result.total_mass_error * maxval) > max_tm_error or abs(last_result.total_mass_error / last_result.mass.values[-1]) * 100 > .001): rr = red_flux.rebalance_valleys(rr, peaks, pneg) #keep the lowest total_mass_error if abs(rr.total_mass_error) < abs(last_result.total_mass_error): last_result = rr else: break iter += 1 out_times = last_result.reduced_flux.times out_values = last_result.reduced_flux.values #return the reduced data, undo normalize of the values (*maxval) return out_times, out_values * maxval, -(last_result.total_mass_error * maxval), peaks.size, iter
def reduct_iter(timeseries, flux_floor, ythresh, out_error, out_error_last, OUT_ERROR_THRESHOLD, UPPER_N, LOWER_N, last_result, MAX_ITERATIONS, algo="iter"): out_error_last = out_error prev_point_count = 0 mass = timeseries.integrate() good_result = ReductionResult(flux=timeseries, mass=mass, reduced_flux=timeseries, reduced_mass=mass) last_result = ReductionResult(flux=timeseries, mass=mass, reduced_flux=timeseries, reduced_mass=mass) epsilon = ythresh mult_by = .5 for ix in range(MAX_ITERATIONS): #execute Ramer–Douglas–Peucker_algorithm temp = rdp.rdp(np.stack((timeseries.times, timeseries.values), axis=-1), epsilon=epsilon, algo=algo) #find the relative error reduced_flux = TimeSeries(temp[:, 0], temp[:, 1], None, None) reduced_mass = tsmath.integrate(reduced_flux) res = ReductionResult(flux=timeseries, mass=mass, reduced_flux=reduced_flux, reduced_mass=reduced_mass) out_error = abs(res.relative_total_mass_error) # if relative error below error threshold record result if out_error < OUT_ERROR_THRESHOLD: #if num of points greater than the lower point bound then we are done # exit loop if res.reduced_flux.times.size >= LOWER_N: last_result = res break #if num of points is smaller than the lower point bound but has more # points than previously found tries then keep this as a potential # good data set. elif res.reduced_flux.times.size > prev_point_count: prev_point_count = res.reduced_flux.times.size good_result = res #reduce epsilon to increase number of points found if epsilon * mult_by > flux_floor: epsilon = epsilon * mult_by else: #previous reduction was not good, try reducing epsilon slower mult_by = mult_by * .5 epsilon = ythresh if epsilon * mult_by > flux_floor: epsilon = epsilon * mult_by else: break last_result = res if prev_point_count > 0: if last_result.reduced_flux.times.size > UPPER_N or out_error_last > OUT_ERROR_THRESHOLD: if good_result.reduced_flux.times.size < UPPER_N: last_result = good_result return last_result, ix
def rebalance_extra_points(reduction_result, num_points=10): #---------------------- # def find_mean_dif_day(): diff = rr.diff_mass #m_diff = max(abs(diff.values)) m_diff = np.mean(abs(diff.values)) if m_diff > 0: #ind = np.flatnonzero(abs(diff.values) == m_diff)[0] ind = np.flatnonzero(abs(diff.values) >= m_diff)[0] return diff.times[ind] return -1 #-------------------- # def check_zero_fluxes(): points = num_points times = rr.reduced_flux.times vals = rr.reduced_flux.values zero_inds = np.flatnonzero(rr.flux.values == 0) series = [] result = [series] expect = None step = 1 #loop through indexes and find consecutive zeros for v in zero_inds: if (v == expect) or (expect is None): series.append(v) else: run = [v] result.append(series) expect = v + step # for r in result: #leave a few points for adding in strategice points. if points <= 10: break if len(r) > 5: times, vals = insert_point(times, vals, rr.flux.times[r[0]], rr.flux.values[r[0]]) times, vals = insert_point(times, vals, rr.flux.times[r[-1]], rr.flux.values[r[-1]]) points -= 2 return points, times, vals rr = reduction_result points, times, vals = check_zero_fluxes() adjusted = TimeSeries(times, vals, None, None) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) #loop through and add mid points at strategic places. for x in range(points): diff_day = find_mean_dif_day() #if diff_day == -1, then max_diff was 0, which means there is nothing to # correct. if diff_day == -1: return rr times = rr.reduced_flux.times vals = rr.reduced_flux.values start_ind = np.flatnonzero(times < diff_day)[-1] end_ind = np.flatnonzero(times >= diff_day)[0] mid_day = 0 #zero_inds = np.flatnonzero(vals[start_ind:end_ind] == 0) #if zero_inds.size > 0: # mid_point = zero_inds[-1] # mid_day = times[mid_point] #else: start_day = times[start_ind] end_day = times[end_ind] mid_day = ((end_day - start_day) / 2) + start_day mid_point = np.flatnonzero(rr.flux.times >= mid_day)[0] if not mid_day in times: times, vals = insert_point(times, vals, rr.flux.times[mid_point], rr.flux.values[mid_point]) adjusted = TimeSeries(times, vals, None, None) reduced_mass = tsmath.integrate(adjusted) rr = ReductionResult(flux=rr.flux, mass=rr.mass, reduced_flux=adjusted, reduced_mass=reduced_mass) if abs(rr.total_mass_error / rr.mass.values[-1]) * 100 < .001: break return rr