Ejemplo n.º 1
0
def build_segments(rr, peaks, pneg, inflection_area):
    inf_pts = get_inflection_points(rr.flux, peaks, pneg, inflection_area)
    #segments,t_mass = build_segments(inf_pts,rr,peaks,pneg)

    x = rr.flux.times
    y = rr.flux.values
    r_x = rr.reduced_flux.times
    r_y = rr.reduced_flux.values
    segments = []
    segs_total_mass = 0

    for years in inf_pts:
        s_year = x[years[0]]
        e_year = x[years[1]]
        if (e_year - s_year) > 4:
            if not np.any(r_x == x[years[0]]):
                r_x, r_y = insert_point(r_x, r_y, x[years[0]], y[years[0]])
            if not np.any(r_x == x[years[0] + 1]):
                r_x, r_y = insert_point(r_x, r_y, x[years[0] + 1],
                                        y[years[0] + 1])
            if not np.any(r_x == x[years[1]]):
                r_x, r_y = insert_point(r_x, r_y, x[years[1]], y[years[1]])
            if not np.any(r_x == x[years[1] - 1]):
                r_x, r_y = insert_point(r_x, r_y, x[years[1] - 1],
                                        y[years[1] - 1])
            r_seg = np.where((r_x >= s_year) & (r_x <= e_year))
            seg_x = r_x[r_seg]
            seg_y = r_y[r_seg]

            timeseries = TimeSeries(seg_x, seg_y, None, None)
            segs_total_mass += timeseries.integrate().values[-1]
            #timeseries = TimeSeries(r_x[r_seg],r_y[r_seg],None,None)
            #timeseries = TimeSeries(r_x[r_start:r_end],r_y[r_start:r_end],None,None)
            segments.append(timeseries)
    return segments, segs_total_mass
Ejemplo n.º 2
0
def add_zero_markers(o_ts,r_ts,flux_floor):
    new_ts = r_ts
    r_times = r_ts.times
    r_values = r_ts.values
    num_steps = o_ts.times.size
    if(o_ts.values[0] <= 0):
        zero_ind = np.where(o_ts.values > 0)[0][0]-1
        if not np.any(r_times == o_ts.times[zero_ind]):
            r_times,r_values = insert_point(r_times,r_values,o_ts.times[zero_ind],o_ts.values[zero_ind])
            #new_ts = TimeSeries(r_times,r_values,None,None)
    if o_ts.times[0] != r_times[0]:
        r_times,r_values = insert_point(r_times,r_values,o_ts.times[0],o_ts.values[0])
        #new_ts = TimeSeries(r_times,r_values,None,None)
    zero_ind = 0
    if(o_ts.values[-1] <= 0):
        zero_ind = np.where(o_ts.values > 0)[0][-1]+1
        if zero_ind < num_steps and not np.any(r_times == o_ts.times[zero_ind]):
            r_times,r_values = insert_point(r_times,r_values,o_ts.times[zero_ind],o_ts.values[zero_ind])
            #new_ts = TimeSeries(r_times,r_values,None,None)
    if o_ts.times[-1] != r_times[-1]:
        r_times,r_values = insert_point(r_times,r_values,o_ts.times[-1],o_ts.values[-1])
        #new_ts = TimeSeries(r_times,r_values,None,None)
    new_ts = TimeSeries(r_times,r_values,None,None)

    return ReductionResult(
                    flux=o_ts,
                    mass=o_ts.integrate(),
                    reduced_flux=new_ts,
                    reduced_mass=new_ts.integrate())
Ejemplo n.º 3
0
def adjust_flux(data, error):
    total_mass = float(0.0)
    #sum cumulative mass of all segments
    for seg in data:
        if len(seg) > 0:
            ts = TimeSeries(seg.times, seg.values, None, None)
            temp_series = ts.integrate()
            total_mass += temp_series.values[-1]
    adjusted = {}

    #total_error_perc = float(0.0)
    #mass_used = float(0.0)

    #figure precentage to adjust each point by
    #    flux_diff = (total_mass+error)/total_mass
    for seg in data:
        #if segment has atleast 3 points (mid points are adjusted)
        if seg.times.size > 2:
            x = seg.times  #[1:-1]
            y = seg.values  #[1:-1]
            #ts = TimeSeries(x,y,None,None)
            mass = seg.integrate().values[-1]
            #get Percent mass current segment is of the total mass
            p_mass = mass / total_mass
            p_error = error / total_mass
            flux_diff = p_mass * p_error
            #get find equivalent percentage of total_error
            #e_mass = error * p_mass
            #divide reduced total error by total mass of segment
            #flux_diff = e_mass / mass
            # if dif is greater than 10% reduce it to 10%
            if abs(flux_diff) > .1:
                flux_diff = abs(flux_diff) / flux_diff * 0.1
            #    if flux_diff >0:
            #        flux_diff = .1
            #    else:
            #        flux_diff = -.1

            #if abs(flux_diff) < 0.001:
            #    flux_diff = abs(flux_diff)/flux_diff *0.001

            adjusted[x[0]] = y[0]
            max_flux = max(y)

            #for each value (except first and last values) adjust value by percent (flux_diff)
            for i in range(1, x.size - 1):

                new_val = y[i] + (y[i] * flux_diff)
                if new_val > max_flux:
                    new_val = y[i] + ((max_flux - y[i]) * .1)
                #should not happen but just in case negative numbers not allowed
                if new_val < 0:
                    new_val = float(0.0)
                adjusted[x[i]] = new_val

    return adjusted
Ejemplo n.º 4
0
def adjust_flux(data, error):
    total_mass = float(0.0)
    #sum cumulative mass of all segments
    for seg in data:
        if len(seg) > 0:
            ts = TimeSeries(seg.times, seg.values, None, None)
            temp_series = ts.integrate()
            total_mass += temp_series.values[-1]
    adjusted = {}

    #figure the amount of flux to adjust each timestep by

    #    flux_diff = ((error) * mass/total_mass)/(number of timesteps to distributed over)

    for seg in data:
        #if segment has atleast 3 points (mid points are adjusted)
        if seg.times.size > 2:
            years = seg.times  #[1:-1]
            fluxes = seg.values  #[1:-1]

            mass = seg.integrate().values[-1]
            #get percent mass current segment is of the total mass
            p_mass = mass / total_mass
            #get find equivalent percentage of total_error
            p_error = error / total_mass

            #multiply p_error * p_mass to distribute correction
            #based on mass percentage of segmenmt
            flux_diff = p_mass * p_error

            if abs(flux_diff) > 0.1:
                flux_diff = abs(flux_diff) / flux_diff * 0.1

            adjusted[years[0]] = fluxes[0]
            max_flux = max(fluxes)

            #for each value (except first and last values(they never change)) adjust value by flux_diff
            for i in range(1, years.size - 1):
                new_val = fluxes[i] + (fluxes[i] * flux_diff)
                if new_val > max_flux:
                    new_val = fluxes[i] + ((max_flux - fluxes[i]) * .1)

                #should not happen but just in case negative numbers not allowed

                if new_val < 0:
                    new_val = float(0.0)
                adjusted[years[i]] = new_val

    return adjusted
Ejemplo n.º 5
0
 def extract(self, row, col):
     """ extract [times], [values] for the row/col """
     column_label = '-'.join([row, col])
     sub = self.df[column_label].values
     x = self.df[YEAR_COL].values
     y = sub
     return TimeSeries(x, y, row, col)
Ejemplo n.º 6
0
def rebalance_valleys(reduction_result, peaks, pneg):
    rr = reduction_result

    error = rr.total_mass_error
    x = rr.flux.times
    y = rr.flux.values
    r_x = rr.reduced_flux.times
    r_y = rr.reduced_flux.values

    segments, t_mass = build_segments(rr, peaks, pneg, .5)

    #if abs(error) > t_mass:
    #    print("*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; increasting inflection points from 50% to 75% of valley area".format(error,t_mass))
    #    segments,t_mass = build_segments(rr,peaks,pneg,.75)
    if abs(error) > t_mass:
        print(
            "*Warning: total_mass_error ({}) exceeds valley mass ({}) for error adjustment; unable to correct mass_error"
            .format(error, t_mass))
        return rr
    adj_dict = adjust_flux(segments, error)

    for i in range(r_x.size):
        year = r_x[i]
        if year in adj_dict.keys():
            r_y[i] = adj_dict[year]

    adjusted = TimeSeries(r_x, r_y, None, None)

    reduced_mass = tsmath.integrate(adjusted)
    rr = ReductionResult(flux=rr.flux,
                         mass=rr.mass,
                         reduced_flux=adjusted,
                         reduced_mass=reduced_mass)
    return rr
    def extract(self, copc, site):
        """ extract [times], [values] for the copc/site """
        sub = self.df[self.df[SITE_COL] == site]
        x = sub[YEAR_COL].values
        y = sub[copc].values
        #checking if zero below threshold is defined and if so, then find values and force to zero
        if self.zero_below is not None:
            idx = y < self.zero_below

        if self.zero_below is not None and any(idx):
            msg = "{}--{}: Forcing values less than '{}' to zero; occurs at \ntimesteps: {} \nwith corresponding flux: {} "
            logging.info(
                msg.format(site, copc, self.zero_below, x[idx], y[idx]))
            y[idx] = 0.0

        return TimeSeries(x, y, copc, site)
Ejemplo n.º 8
0
def find_inflection(x, y, s_ind, e_ind):
    #Calculate mass
    ts1 = TimeSeries(x[s_ind:e_ind], y[s_ind:e_ind], None, None)
    mass = ts1.integrate().values[-1]
    half_mass = mass / 2
    #build loop criteria process from peak to valley
    #if value at s_ind is < than value at e_ind then e_ind is the peak.
    # therefore process in reverse
    loop = range(e_ind, s_ind, -1)
    reverse = True
    #start_ind = s_ind
    #last_ind = e_ind
    #if value at s_ind is > than value at e_ind then s_ind is the peak.
    #therefor process in sequence
    if y[s_ind] > y[e_ind]:
        loop = range(s_ind, e_ind)
        reverse = False
    #starting at peak find the total mass between valley and i. stop when <= half_mass
    for i in loop:
        #s_ind is the peak
        start = s_ind
        end = i
        #s_ind is the valley
        if reverse == False:
            start = i
            end = e_ind
        #calculate mass
        ts2 = TimeSeries(x[start:end], y[start:end], None, None)
        mass = ts2.integrate().values[-1]
        #check if mass <= half mass
        if mass <= half_mass:
            return i
    #should never get to here.
    if reverse:
        return e_ind
    else:
        return s_ind
Ejemplo n.º 9
0
def reduce_dataset(years, values,flux_floor=0,max_tm_error=0,min_reduction_steps=200):
    """ takes  times and values and then reduces it

    returns reduced_times and reduced_values

    if all elements are zero, it returns False

    flux_floor > flux == 0
    max_tm_error > total mass error
    """
    non_zero_ind, min_retained_zero_years = remove_begin_end_zero_flux(years,values,flux_floor,min_reduction_steps)

    years_mod = years[non_zero_ind]
    values_mod = values[non_zero_ind]

    if years_mod.size <3:
        years_mod = years
        values_mod = values
        values_mod = 0
    else:
        #makes ure you have not removed more than 1% of the mass when removing 0 or flux floor rates
        o_mass = TimeSeries(years,values,None,None).integrate().values[-1]
        r_mass = TimeSeries(years_mod, values_mod, None, None).integrate().values[-1]
        if abs((o_mass-r_mass)/o_mass)*100 > 1:
            years_mod = years
            values_mod = values
            timeseries = TimeSeries(years_mod, values_mod, None, None)
            mass = timeseries.integrate()

    #normalize Values
    maxval = np.max(values_mod)
    values_mod = values_mod/maxval
    o_timeseries = TimeSeries(years,values/maxval,None,None)
    o_mass = o_timeseries.integrate()
    timeseries = TimeSeries(years_mod, values_mod, None, None)
    mass = timeseries.integrate()

    mx = np.argmax(timeseries.values)
    points = [0, mx, len(timeseries)]
    x = timeseries.times

    ythresh = 100*np.mean(timeseries.values)
    out_error = 1
    out_error_last = out_error
    OUT_ERROR_THRESHOLD = 1e-2

    UPPER_N = 200
    LOWER_N = 50
    last_result = None
    MAX_ITERATIONS = 80

    solve_type = SMOOTH
    simple_peaks = False
    last_result,ix = reduct_iter(timeseries,flux_floor,ythresh,out_error,out_error_last,OUT_ERROR_THRESHOLD,UPPER_N,LOWER_N,last_result,MAX_ITERATIONS)
    last_result = retain_min_years(last_result.reduced_flux,o_timeseries,o_mass,min_retained_zero_years)
    #if there are less points than the min_reduction_steps then use the remaining
    #points to rebalance the segments with the largest mass errors.
    play_points = min_reduction_steps - last_result.num_reduced_points
    bef = last_result.reduced_flux.times.size
    if play_points > 0:
        last_result = red_flux.rebalance_extra_points(last_result,play_points)

    rr = last_result

    #find peaks for data rebalance and reporting
    peaks, _ = sig.find_peaks(rr.reduced_flux.values,width=3,rel_height=1)
    if peaks.size == 0 :
        peaks, _ = sig.find_peaks(rr.reduced_flux.values,width=2,rel_height=1)
        if peaks.size == 0:
            peaks, _ = sig.find_peaks(rr.reduced_flux.values,width=1,rel_height=1)
    pneg, _ = sig.find_peaks(-rr.reduced_flux.values,width=3,rel_height=1)
    if pneg.size == 0:
        pneg, _ = sig.find_peaks(-rr.reduced_flux.values,width=2,rel_height=1)
        if pneg.size == 0:
            pneg, _ = sig.find_peaks(-rr.reduced_flux.values,width=1,rel_height=1)

    peaks = rr.reduced_flux.times[peaks]
    pneg = rr.reduced_flux.times[pneg]

    peaks = np.isin(o_timeseries.times,peaks)
    pneg = np.isin(o_timeseries.times,pneg)
    peaks = np.where(peaks)
    pneg = np.where(pneg)

    peaks = peaks[0]
    pneg = pneg[0]
    iter = 0
    while iter < 100 and (abs(last_result.total_mass_error*maxval) > max_tm_error or abs(last_result.total_mass_error/last_result.mass.values[-1])*100 > .001) :
        rr = red_flux.rebalance_valleys(rr,peaks,pneg)
        #keep the lowest total_mass_error
        if abs(rr.total_mass_error) < abs(last_result.total_mass_error):
            last_result = rr
        else:
            break
        iter += 1

    out_times = last_result.reduced_flux.times
    out_values = last_result.reduced_flux.values
    #return the reduced data, undo normalize of the values (*maxval)
    return out_times, out_values*maxval,-(last_result.total_mass_error * maxval),peaks.size,iter
Ejemplo n.º 10
0
def reduct_iter(timeseries,flux_floor,ythresh,out_error,out_error_last,OUT_ERROR_THRESHOLD,UPPER_N,LOWER_N,last_result,MAX_ITERATIONS, algo="iter"):
    out_error_last = out_error
    prev_point_count = 0

    mass = timeseries.integrate()
    good_result=ReductionResult(
                flux=timeseries,
                mass=mass,
                reduced_flux=timeseries,
                reduced_mass=mass)
    last_result = ReductionResult(
                flux=timeseries,
                mass=mass,
                reduced_flux=timeseries,
                reduced_mass=mass)
    epsilon = ythresh
    mult_by = .5

    for ix in range(MAX_ITERATIONS):
        #execute Ramer–Douglas–Peucker_algorithm
        temp = rdp.rdp(np.stack((timeseries.times,timeseries.values), axis=-1), epsilon=epsilon,algo=algo)
        #find the relative error
        reduced_flux = TimeSeries(temp[:, 0],temp[:, 1],None,None)
        reduced_mass = tsmath.integrate(reduced_flux)
        res = ReductionResult(
                flux=timeseries,
                mass=mass,
                reduced_flux=reduced_flux,
                reduced_mass=reduced_mass)

        out_error = abs(res.relative_total_mass_error)
        # if relative error below error threshold record result
        if out_error < OUT_ERROR_THRESHOLD:
            #if num of points greater than the lower point bound then we are done
            # exit loop
            if res.reduced_flux.times.size >= LOWER_N:
                last_result = res
                break
            #if num of points is smaller than the lower point bound but has more
            # points than previously found tries then keep this as a potential
            # good data set.
            elif res.reduced_flux.times.size > prev_point_count:
                prev_point_count = res.reduced_flux.times.size
                good_result = res
        #reduce epsilon to increase number of points found
        if epsilon * mult_by > flux_floor:
            epsilon = epsilon * mult_by
        else:
            #previous reduction was not good, try reducing epsilon slower
            mult_by = mult_by * .5
            epsilon = ythresh
            if epsilon * mult_by > flux_floor:
                epsilon = epsilon * mult_by
            else:
                break

        last_result = res
    if prev_point_count > 0:
        if last_result.reduced_flux.times.size > UPPER_N or out_error_last > OUT_ERROR_THRESHOLD:
            if good_result.reduced_flux.times.size < UPPER_N:
                last_result = good_result

    return last_result,ix
Ejemplo n.º 11
0
def rebalance_extra_points(reduction_result, num_points=10):
    #----------------------
    #
    def find_mean_dif_day():
        diff = rr.diff_mass
        #m_diff = max(abs(diff.values))
        m_diff = np.mean(abs(diff.values))
        if m_diff > 0:
            #ind = np.flatnonzero(abs(diff.values) == m_diff)[0]
            ind = np.flatnonzero(abs(diff.values) >= m_diff)[0]
            return diff.times[ind]
        return -1

    #--------------------
    #
    def check_zero_fluxes():
        points = num_points
        times = rr.reduced_flux.times
        vals = rr.reduced_flux.values
        zero_inds = np.flatnonzero(rr.flux.values == 0)
        series = []
        result = [series]
        expect = None
        step = 1
        #loop through indexes and find consecutive zeros
        for v in zero_inds:
            if (v == expect) or (expect is None):
                series.append(v)
            else:
                run = [v]
                result.append(series)
            expect = v + step
        #

        for r in result:
            #leave a few points for adding in strategice points.
            if points <= 10:
                break
            if len(r) > 5:
                times, vals = insert_point(times, vals, rr.flux.times[r[0]],
                                           rr.flux.values[r[0]])
                times, vals = insert_point(times, vals, rr.flux.times[r[-1]],
                                           rr.flux.values[r[-1]])
                points -= 2
        return points, times, vals

    rr = reduction_result
    points, times, vals = check_zero_fluxes()
    adjusted = TimeSeries(times, vals, None, None)
    reduced_mass = tsmath.integrate(adjusted)
    rr = ReductionResult(flux=rr.flux,
                         mass=rr.mass,
                         reduced_flux=adjusted,
                         reduced_mass=reduced_mass)
    #loop through and add mid points at strategic places.
    for x in range(points):
        diff_day = find_mean_dif_day()
        #if diff_day == -1, then max_diff was 0, which means there is nothing to
        # correct.
        if diff_day == -1:
            return rr
        times = rr.reduced_flux.times
        vals = rr.reduced_flux.values
        start_ind = np.flatnonzero(times < diff_day)[-1]
        end_ind = np.flatnonzero(times >= diff_day)[0]
        mid_day = 0
        #zero_inds = np.flatnonzero(vals[start_ind:end_ind] == 0)
        #if zero_inds.size > 0:
        #    mid_point = zero_inds[-1]
        #    mid_day = times[mid_point]
        #else:
        start_day = times[start_ind]
        end_day = times[end_ind]
        mid_day = ((end_day - start_day) / 2) + start_day
        mid_point = np.flatnonzero(rr.flux.times >= mid_day)[0]
        if not mid_day in times:
            times, vals = insert_point(times, vals, rr.flux.times[mid_point],
                                       rr.flux.values[mid_point])
        adjusted = TimeSeries(times, vals, None, None)
        reduced_mass = tsmath.integrate(adjusted)
        rr = ReductionResult(flux=rr.flux,
                             mass=rr.mass,
                             reduced_flux=adjusted,
                             reduced_mass=reduced_mass)

        if abs(rr.total_mass_error / rr.mass.values[-1]) * 100 < .001:
            break
    return rr