Example #1
0
def aglet(src, window, dst=None):
    """straigten the ends of a windowed sequence.

    Replace the window/2 samples at each end of the sequence with
    lines fit to the full window at each end.  This boundary treatment
    for windowed smoothers is better behaved for detrending than
    decreasing window sizes at the ends.

    Parameters
    ----------
    src : ndarray
        list of observed values
    window : int
        odd integer window size (as would be provided to a windowed smoother)
    dst : ndarray
        if provided, write aglets into the boundaries of this array.
        if dst=src, overwrite ends of src in place. If None, allocate result.

    Returns
    -------
    dst : ndarray
        array composed of src's infield values with aglet ends.

    """
    if dst is None:
        dst = np.array(src)
    half = int(window / 2)
    leftslope = stats.theilslopes(src[: window])[0]
    rightslope = stats.theilslopes(src[-window :])[0]
    dst[0:half] = np.arange(-half, 0) * leftslope + src[half]
    dst[-half:] = np.arange(1, half + 1) * rightslope + src[-half - 1]
    return dst
Example #2
0
def aglet(src, window, dst=None):
    """straigten the ends of a windowed sequence.

    Replace the window/2 samples at each end of the sequence with
    lines fit to the full window at each end.  This boundary treatment
    for windowed smoothers is better behaved for detrending than
    decreasing window sizes at the ends.

    Parameters
    ----------
    src : ndarray
        list of observed values
    window : int
        odd integer window size (as would be provided to a windowed smoother)
    dst : ndarray
        if provided, write aglets into the boundaries of this array.
        if dst=src, overwrite ends of src in place. If None, allocate result.

    Returns
    -------
    dst : ndarray
        array composed of src's infield values with aglet ends.

    """
    if dst is None:
        dst = np.array(src)
    half = window // 2
    leftslope = stats.theilslopes(src[:window])[0]
    rightslope = stats.theilslopes(src[-window:])[0]
    dst[0:half] = np.arange(-half, 0) * leftslope + src[half]
    dst[-half:] = np.arange(1, half + 1) * rightslope + src[-half - 1]
    return dst
Example #3
0
    def trend(value, pval):
        """ Function used to compute the Theil-Sen estimator for a set of points (x, y).
            Theilslopes implements a method for robust linear regression.
            It computes the slope as the median of all slopes between paired values.

            link: https://docs.scipy.org/doc/scipy-0.17.1/reference/generated/scipy.stats.theilslopes.html

            Args:
                value (pandas.Series): the raw data
                pval (float): confidence level in probability (default is 0.95 or 95%)

            Returns:
                float: slope
        """
        try:
            if len(value) <= 1000:
                # res = slope, intercept, lo_slope, up_slope
                res = stats.theilslopes(value, np.arange(len(value)), 1 - pval)
                if res[2] < 0 < res[3]:
                    slope = 0
                else:
                    slope = res[0]
            elif 1000 < len(value) <= 1000000:
                # dimension of the batch
                dim_batch = int(len(value) / 1000)
                # number of batches
                n_batch = int(len(value) / dim_batch)
                # new list of values batched
                value_batch = []
                for i in range(0, n_batch):
                    value_batch.append(
                        np.median(value[dim_batch * i:dim_batch * (i + 1)]))
                res = stats.theilslopes(value_batch,
                                        np.arange(len(value_batch)), 1 - pval)
                if res[2] < 0 < res[3]:
                    slope = 0
                else:
                    slope = res[0]
            else:
                y_list = value.tolist()
                x_list = np.arange(len(y_list))
                # lsq_res = slope, intercept, r_value, p_value, std_err
                lsq_res = stats.linregress(x_list, y_list)
                if pval < lsq_res[3]:
                    slope = 0
                else:
                    slope = lsq_res[0]

            return slope
        except:
            raise
Example #4
0
    def estimate_low_temp_correction(self):
        "Estimate energy value using the slope of the values to the peak of an arrhenius plot"
        if len(self.upslope_x) > 3:
            #Estimste THL
            upslope_diff_x = self.upslope_x[:-2]
            upslope_diff_y = np.diff(self.upslope_y[:-1])

            max_diff_index = np.argmax(upslope_diff_y)
            min_diff_index = np.argmin(upslope_diff_y)
            self.T_H_L = (upslope_diff_x[max_diff_index] +
                          upslope_diff_x[min_diff_index]) / 2
        else:
            self.T_H_L = self.T_pk - 10

        if len(self.upslope_x) > 5:
            #estimate EDL
            x = np.array_split(self.upslope_x, 3)[0]
            y = np.array_split(self.upslope_y, 3)[0]

            ahrr_x = 1 / (self.k * x)
            ahrr_y = np.log(y)

            try:
                slope, *vals = stats.theilslopes(
                    ahrr_x, ahrr_y, 0.9)  #maybe more robust given noisy data?
            except:
                slope, *vals = stats.linregress(ahrr_x, ahrr_y)

            self.E_D_L_init = slope + self.E_init
        else:
            self.E_D_L_init = self.E_init * (-2)  #Default value
Example #5
0
 def sync(self):
     self._sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
     self._sock.settimeout(self._timeout)
     t = [[], [], [], []]
     reg = re.compile(b"(.*),(.*),(.*)")
     self.logger.info("Syncing")
     i = 0
     while i < self._rounds:
         try:
             t0 = b"%.9f" % self.now()
             self._sock.sendto(t0, (self._host, self._port))
             data, address = self._sock.recvfrom(self._buffer_size)
             t3 = self.now()
             r = reg.findall(data)[0]
             if r[0] == t0:  # make sure we have a matching UDP packet
                 r = np.float64(r)
                 for j in range(3):
                     t[j].append(r[j])
                 t[3].append(np.float64(t3))
                 i += 1
         except socket.timeout:
             continue
         progress = "Progress: %.2f%%" % (i * 100 / self._rounds)
         print(progress, end="\r", flush=True)
     self._sock.close()
     t = np.array(t)
     offset = ((t[1] - t[0]) + (t[2] - t[3])) / 2
     delay = (t[3] - t[0]) - (t[2] - t[1])
     _, offset, _, _ = stats.theilslopes(offset, delay)
     self.offset_remote = offset
     self.logger.info("Offset: %f", offset)
     return offset
Example #6
0
    def estimate_high_temp_correction(self):
        "Estimate energy value using the slope of the values to the peak of an arrhenius plot"

        if len(self.downslope_x) > 1:
            #Estimate ED
            x = 1 / (self.k * self.downslope_x)
            y = np.log(self.downslope_y)

            try:
                slope, *vals = stats.theilslopes(
                    x, y, 0.9)  #maybe more robust given noisy data?
            except:
                slope, *vals = stats.linregress(x, y)

            self.E_D_init = slope + self.E_init

            #Estimate TH
            downslope_diff_x = self.downslope_x[1:]
            downslope_diff_y = np.diff(self.downslope_y)

            max_change_index = np.argmin(downslope_diff_y)
            self.T_H = self.T_pk + (
                (downslope_diff_x[max_change_index] - self.T_pk) / 2)
        else:
            self.E_D_init = self.E_init * (4)  #Default value
            self.T_H = self.T_pk + 3
Example #7
0
def EstimateTrend(df, dfout, wname, npoints):
    xm = pd.to_datetime(df['Date'])
    #    print(f'xm={xm}')
    #wname = list(df.columns)
    # wname.remove('Date')
    for id, wn in enumerate(wname):  # id is count, s is wname
        cols = ['Date', wn]
        df_new = df[cols]
        df_new = df_new.dropna(subset=cols)
        print(f'df_new size = {df_new.shape}')
        if df_new.empty:
            print('Datafram is empty. Skipped. \n')
            # break
        else:
            if df_new.shape[0] > npoints:
                print(f'df_new size={df_new.shape} \n')
                xm = df_new['Date'].dt.year
                ym = df_new[wn]
                modelm = stats.theilslopes(ym, xm, 0.95)
                dfout.loc[id, 'trend'] = modelm[0]
                dfout.loc[id, 'up'] = modelm[2]
                dfout.loc[id, 'lo'] = modelm[3]
                dfout.loc[id, 'start_date'] = xm.min()
                dfout.loc[id, 'end_date'] = xm.max()
                dfout.loc[id, 'nobs'] = df_new.shape[0]
                print(
                    f'{id}: Well: {wn}, nobs=[{len(xm)}, {len(ym)}], trend = { modelm[0]}'
                )
    return dfout
Example #8
0
def _trend_theilslopes_wrapper(y, x, nmin=3, **kwargs):
    from scipy.stats import theilslopes
    ii = np.isfinite(y)
    if ii.sum() > nmin:
        # k, d, min, max wenn alpha
        return np.asarray(theilslopes(y[ii], x[ii], **kwargs))
    return np.array([np.nan] * 4)
Example #9
0
    def guess(self, time, density):
        '''
        Guess the steady-state nucleation rate and induction time

        The guess is based on a robust linear regression of the density over
        time data. Guesses are useful for non-linear regressions.

        Parameters
        ----------
        time : array_like
            Elapsed time.

        density : array_like
            Nuclei density.

        Returns
        -------
        guess_rate : float
            Guess for the steady-state nucleatio rate for the yielded dataset

        guess_induction_time : float
            Guess for the induction time for the yielded dataset
        '''
        slope, intercept, _, _ = theilslopes(x=time, y=density)
        guess_rate, guess_induction_time = slope, -intercept / slope

        if guess_induction_time < 1e-3:
            guess_induction_time = 1e-3

        return guess_rate, guess_induction_time
Example #10
0
def _theil_slopes_ufunc(y):
    """
    Wrapper function for `scipy.stats.theilslopes` to be used in a vectorized 
    way in `theil_slopes_boosted` function.

    Parameters
    ----------
    y : numpy array
        One-dimensional data array.

    Returns
    -------
    results : numpy array
        Array containing the following results: (1) Theil slope, (2) Intercept
        of the Theil line, (3) Lower and (4) upper bounds of the confidence 
        interval on Theil slope.     
    """

    # Dummy index in regression.
    x = np.arange(y.shape[0])

    # Just one not a number is sufficient to spoil calculations.
    if np.sum(np.isnan(y)) > 0:

        return np.array([np.nan, np.nan, np.nan, np.nan])

    # Output.
    else:

        slope, intercept, low_slope, upp_slope = stats.theilslopes(y=y, x=x)
        results = np.array([slope, intercept, low_slope, upp_slope])

        return results
Example #11
0
def velo(a1, a2, alpha):
    """Measures improvement velocity of workers. Returns index of the one which
    is expected to be better after a given time."""
    n = len(a1) + len(a2) - 2
    medslope1, medintercept1, lo_slope1, up_slope1 = theilslopes(a1, alpha=alpha)
    medslope2, medintercept2, lo_slope2, up_slope2 = theilslopes(a2, alpha=alpha)
    anchor1 = medslope1 * (len(a1)-1)/2 + medintercept1
    anchor2 = medslope2 * (len(a2)-1)/2 + medintercept2
    y1lo = lo_slope1 * (n - (len(a1)-1)/2) + anchor1  # extrapolate out and see
    y1hi = up_slope1 * (n - (len(a1)-1)/2) + anchor1  # which leads to higher score
    y2lo = lo_slope2 * (n - (len(a2)-1)/2) + anchor2  # at present rate of
    y2hi = up_slope2 * (n - (len(a2)-1)/2) + anchor2  # improvement
    if y1lo > y2hi:
        return 0
    if y2lo > y1hi:
        return 1
    return None
Example #12
0
def plot_theil_sen_trend(pax, xdata, ydata, color, linestyle):
    res = stats.theilslopes(ydata, xdata, 0.90)
    print("Theil-Sen: {0}x + {1}".format(res[0], res[1]))

    # Then, plot the trend line on the figure
    pax.plot(xdata, res[1] + res[0] * xdata, \
        color='k', linewidth = 2.5, linestyle = linestyle)
    # Then, plot the trend line on the figure
    pax.plot(xdata, res[1] + res[0] * xdata, \
        color=color, linestyle = linestyle)
Example #13
0
def crossregress(var, a, lag=0):
    """
    Input:-
    var: 1-D array var
    a: 1-D array index

    Output: regression coefficient

    """
    shiftedy = np.roll(a, lag)
    r = stats.theilslopes(var, shiftedy)[0]
    return r
Example #14
0
def line_filter(data, window):
    """fit a line to the data, after filtering"""
    # knock down seasonal variation with a median filter first
    half = window // 2
    coarse = median_filter(data, window)[half:-half]  # discard crazy ends
    slope, _, lower, upper = stats.theilslopes(coarse)
    if lower <= 0.0 and upper >= 0.0:
        filtered = np.zeros(len(data)) + np.median(data)
    else:
        intercept = np.median(data) - (len(data) - 1) / 2 * slope
        filtered = slope * np.arange(len(data)) + intercept
    return filtered
Example #15
0
def line_filter(data, window):
    """fit a line to the data, after filtering"""
    # knock down seasonal variation with a median filter first
    half = window // 2
    coarse = median_filter(data, window)[half:-half]  # discard crazy ends
    slope, _, lower, upper = stats.theilslopes(coarse)
    if lower <= 0.0 and upper >= 0.0:
        filtered = np.zeros(len(data)) + np.median(data)
    else:
        intercept = np.median(data) - (len(data) - 1) / 2 * slope
        filtered = slope * np.arange(len(data)) + intercept
    return filtered
Example #16
0
    def estimate_trend(self, time_series_x: np.ndarray,
                       time_series_y: np.ndarray) -> np.ndarray:
        """
        Function used to execute the trend estimation process of the method

        :param time_series_x: time variable of the time series
        :param time_series_y: value of the time series
        :return: array with the trend
        """
        slope, intercept, lo_slope, hi_slope = stats.theilslopes(
            time_series_y, time_series_x, self.confidence)
        trend = intercept + slope * time_series_x
        return trend
Example #17
0
    def regress(self):
        '''
        Use scipy lingress function to perform linear regression      
        '''
        slope, intercept, r_val, p_val, std_err = stats.linregress(self.AvgTemp_US_py['Year'], \
                                                                       self.AvgTemp_US_py['AverageTemperature'])

        # Create regression line
        self.regressLine = intercept + self.AvgTemp_US_py['Year'] * slope

        # Regression using Theil-Sen with 95% confidence intervals
        self.res = stats.theilslopes(self.AvgTemp_US_py['AverageTemperature'],
                                     self.AvgTemp_US_py['Year'], 0.95)
Example #18
0
def TheilSen_regression(x, y, confidence_inter):
    '''
    apply Theil-Sen estimator to points
    :param x: x values of points
    :param y: y values of points
    :param confidence_inter: the conficence interval, notes: 0.1 and 0.9 have the same output
    :return: # constant b, slope, lower slope, upper slope
    '''

    res = stats.theilslopes(y, x, confidence_inter)

    return res[1], res[0], res[2], res[
        3]  # constant b, slope, lower slope, upper slope
Example #19
0
    def estimate_E_init(self):
        "Estimate energy value using the slope of the values to the peak of an arrhenius plot"
        if len(self.upslope_x) > 1:
            x = 1 / (self.k * self.upslope_x)
            y = np.log(self.upslope_y)

            try:
                slope, *vals = stats.theilslopes(
                    x, y, 0.9)  #maybe more robust given noisy data?
            except:
                slope, *vals = stats.linregress(x, y)

            self.E_init = abs(slope)
        else:
            self.E_init = 0.6  #Default value
Example #20
0
def sentheil_perchan(xvals, yvals, alpha=0.85):

    slope = np.empty((len(xvals)))
    upper_uncert = np.empty((len(xvals)))
    lower_uncert = np.empty((len(xvals)))

    for i, (xval, yval) in enumerate(zip(xvals, yvals)):

        out = theilslopes(yval, x=xval, alpha=alpha)

        slope[i] = out[0]
        upper_uncert[i] = out[3] - out[0]
        lower_uncert[i] = out[0] - out[2]

    return slope, lower_uncert, upper_uncert
Example #21
0
def linear_fit(tt, xx, yy, eyy, method='ls'):
    #xx = 0.67*xx
    log_x = np.log10(xx)
    log_y = np.log10(yy)
    log_e_y = eyy / yy / np.log(10.)

    if method == 'ls':
        popt, pcov = curve_fit(lin_func, log_x, log_y, sigma=log_e_y)
        a, b = popt
        ea, eb = np.sqrt(np.diag(pcov))

    elif method == 'bces':
        log_e_x = eyy * 1.e-20 / np.log(10.) / yy
        cov = np.zeros_like(log_x)
        a_bces, b_bces, aerr_bces, berr_bces, covab = bces.bces.bces(
            log_x, log_e_x, log_y, log_e_y, cov)
        a = a_bces[3]
        ea = aerr_bces[3]
        b = b_bces[3]
        eb = berr_bces[3]
        #b = 10.**b_bces[3]
        #e_b = berr_bces[3] * 10.**b_bces[3] * np.log(10)

    elif method == 'siegel_h':
        a, b = stats.siegelslopes(log_y, log_x)
        eb, ea = 0, 0
    elif method == 'siegel_s':
        a, b = stats.siegelslopes(log_y, log_x, method='separate')
        eb, ea = 0, 0

    elif method == 'theil_sen':
        a, b, am, ap = stats.theilslopes(log_y, log_x, 0.68)
        eb, ea = a - am, 0

    elif method == 'rlm':
        log_X = sm.add_constant(log_x)
        resrlm = sm.RLM(log_y, log_X).fit()
        b, a = resrlm.params
        eb, ea = resrlm.bse

    #a,b = popt
    #ea ,eb = np.sqrt(np.diag(pcov))
    par = [a, 10**b]
    per = [ea, 10.**b * np.log(10) * eb]
    fit = pow_law_func(tt, par[0], par[1])
    return par, per, fit
Example #22
0
def fun(path, file, ylabel1, ylabel2):
    """

    :param path:
    :param file:
    :param ylabel1:
    :param ylabel2:
    """
    data = pd.read_excel(path, sheet_name=file, index_col=0)
    a = data.columns
    a = np.array(a)
    s = []
    r = []
    p = []
    s1 = []
    r1 = []
    for i in a[1:]:
        x_data = data[a[0]]
        y_data = data[i]
        OLS = stats.linregress(x_data, y_data)
        Theil = stats.theilslopes(y_data, x_data)
        s.append(OLS[0])
        r.append(OLS[2] ** 2)
        p.append(OLS[3])
        s1.append(Theil[0])
        y_p = Theil[1] + Theil[0] * x_data
        y_p1 = OLS[1] + OLS[0] * x_data
        ssr = ((y_p - y_data.mean()) ** 2).sum()
        sst = ((y_data - y_data.mean()) ** 2).sum()
        r1.append(ssr / sst)
        # fig = plt.figure()
        # plt.plot(x_data, y_data, '.', color='b', ms=5)
        # plt.plot(x_data, y_p, color='r', )
        # plt.title(file + '_' + i + '_Theil-Sen趋势图')
        # if i in a[1:6]:
        #     plt.ylabel(ylabel1)
        # else:
        #     plt.ylabel(ylabel2)
        # plt.xlabel('年份')
        # fig.savefig('Z:/Group/Liuqiang/CEVSA趋势图/' + file + '_8018' + '_' + i + '_Theil-Sen趋势图.png', dpi=750,
        #             bbox_inches='tight')
    data_tavg_s = pd.DataFrame({'地区': a[1:], 'slope': s, 'r_squared': r, 'pvalue': p, 'Theil-Sen_slope': s1,
                                'Theil-Sen_r_squared': r1})
    with pd.ExcelWriter(path, mode='a', engine='openpyxl') as writer:
        data_tavg_s.to_excel(writer, sheet_name=file + '_趋势')
Example #23
0
def check_powerlaw(xs, ys, trunc=None, e_trunc=None, add_to_log=False, color=None):

    if trunc is not None:
        xs = xs[trunc:]
        ys = ys[trunc:]
    if e_trunc is not None:
        xs = xs[:-e_trunc]
        ys = ys[:-e_trunc]

    r = linregress(np.log10(xs), np.log10(ys))

    if add_to_log:
        plt.plot(xs, 10**(r.intercept) * (xs **(r.slope)), color=color)
    else:
        plt.plot(np.log10(xs), r.slope*np.log10(xs) + r.intercept)
        plt.scatter(np.log10(xs), np.log10(ys))
    
    print('linregress slope:', r.slope, 'linregress rvalue:', r.rvalue)
    ts = theilslopes(np.log10(ys), np.log10(xs))
    print('Theil-Sen slope:', ts[0], '95% CI:', ts[2], ts[3])
Example #24
0
    def plotNS(self, ns, db, wellcombo='XX-YY', ax='None'):
        validres = False
        if (len(db) > 1):
            res = stats.theilslopes(db, ns, 0.90)
            lsq_res = stats.linregress(ns, db)
            validres = True
        else:
            res = []
            lsq_res = []
        #Plot the results. The Theil-Sen regression line is shown in red, with the dashed red lines illustrating the confidence interval of the slope (note that the dashed red lines are not the confidence interval of the regression as the confidence interval of the intercept is not included). The green line shows the least-squares fit for comparison.
        genFig = False
        if ax == 'None':
            fig = plt.figure()
            ax = fig.add_subplot(111)
            genFig = True

        ax.plot(ns, db, 'bo')
        if (validres):
            ax.plot(ns, res[1] + res[0] * ns, 'r-')
            ax.plot(ns, lsq_res[1] + lsq_res[0] * ns, 'g-')
        ax.set_xlabel('ln(ns)')
        ax.set_ylabel('dB Power')

        # confidence interval of slopes
        # ax.plot(ns, res[1] + res[2] * ns, 'r--')
        #ax.plot(ns, res[1] + res[3] * ns, 'r--')

        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        ax.text(0.55,
                0.9,
                wellcombo,
                transform=ax.transAxes,
                fontsize=14,
                verticalalignment='top',
                bbox=props)
        if (genFig):
            plt.show()
            return res
        else:
            return res, ax
def theilsen(s, alpha=0.95):
    """
    Function to compute Theil-Sen slopes.
    :param s: pandas.Series
    :param alpha: Confidence degree between 0 and 1. Default is 95% confidence.
    :return: trd: dictionary containing intercept and slope of the median, upper CI and lower CI respectively.

    """
    x = s.index.to_julian_date()
    x = np.reshape(x, (-1, 1))
    trd = stats.theilslopes(s, x, alpha=alpha)
    interc_low = np.median(s) - np.median(x) * trd[2]
    interc_upp = np.median(s) - np.median(x) * trd[3]
    trd = pd.Series({
        'median_slope': trd[0],
        'median_interc': trd[1],
        'lower_CI_slope': trd[2],
        'lower_CI_interc': interc_low,
        'upper_CI_slope': trd[3],
        'upper_CI_interc': interc_upp
    })
    return trd
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Example #27
0
def _resistant_fit_linear(
        source: np.ndarray,
        target: np.ndarray,
        p: np.float32 = 0.75,
        verbose: bool = False,
        init_step: str = "ransac"
) -> Tuple[np.ndarray, np.float32, np.float32]:
    """
    Use resistant fit to normalize cell x (source) to the reference cell y (target).

    :param source: the gene expression of the cell x
    :param target: the gene expression of the cell y, reference cell
    :param p: the size of biological feature set
    :param verbose: verbose flag for debug
    :return:
        y_regression: the normalized 1d array
        slope: the final slope from EM Regression
        intercept: the final intercept from EM Regression
    """
    ########################################
    # Select valid genes for regression
    ########################################
    iter_limit = 20
    np.seterr(all='raise')
    select_mask = _preprocess(source, target)
    n_select = np.sum(select_mask)

    x_select = source[select_mask].copy()  # Note that len(x_select) <= source
    y_select = target[select_mask].copy()

    ############################################################
    # Init EM step: robust regression on all genes
    ############################################################
    # Robust regression on the whole dataset to ignore outliers
    if init_step == "ransac":
        ransac = linear_model.RANSACRegressor(random_state=42)
        ransac.fit(x_select.copy().reshape(-1, 1),
                   y_select.copy().reshape(-1, 1))
        slope, intercept = float(ransac.estimator_.coef_), float(
            ransac.estimator_.intercept_)
    elif init_step == "siegel":
        slope, intercept = stats.siegelslopes(y_select, x_select)
    elif init_step == "theil":
        slope, intercept, _, _ = stats.theilslopes(y_select, x_select)
    else:
        raise NameError(
            "init_step must be chosen from list ['ransac', 'siegel', 'theil']")

    y_regression = np.asarray(
        [slope * x_iter + intercept for x_iter in x_select])
    square_list = np.square(y_select - y_regression)
    square_list_index_sort = np.argsort(square_list)
    sub_index = square_list_index_sort[0:int(n_select * p) + 1]

    # Set Biological Feature Set (BFS)
    x_bfs = x_select[sub_index]
    y_bfs = y_select[sub_index]

    if verbose:
        logger.info(f'[Init EM step] slope:{slope},  intercept:{intercept}')

    ############################################################
    # Resistant Fit Regression on BFS
    ############################################################
    loss_pre = np.Inf
    for i in range(iter_limit):
        # E step: Linear regression on BFS
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            x_bfs, y_bfs)

        y_regression = np.asarray(
            [slope * x_iter + intercept for x_iter in x_select])
        square_list = np.square(y_select - y_regression)
        square_list_index_sort = np.argsort(square_list)
        sub_index = square_list_index_sort[1:int(n_select * p) + 1]

        loss = np.sum(square_list[sub_index])

        delta_loss = abs(loss_pre - loss)
        if delta_loss < 0.00001:
            if verbose:
                logger.info('convergence')
            break
        else:
            loss_pre = loss

        # M step: Update x and y for next iteration of linear regression
        x_bfs = x_select[sub_index]
        y_bfs = y_select[sub_index]

        if i == iter_limit:
            if not verbose:
                logger.info('[Resist Fit] Reach iteration limit')

    ############################################################
    # Normalize cell y based on regression model
    ############################################################
    y_regression = slope * source + intercept
    # If x is zero then clip y to 0
    # TODO: x is unlikely to be zero.
    y_regression[source == 0] = 0

    if verbose:
        logger.info(f'[Resist Fit] slope: {slope}, intercept: {intercept}')
        logger.info(
            f'depth(y_select): {np.sum(y_select)}, \n'
            f'depth(x_select): {np.sum(x_select)}, \n'
            f'depth(x): {np.sum(source)},\n'
            f'depth(norm): {np.sum(y_regression)},\n'
            f'depth(y): {np.sum(target)}\n'
            f'y_select/x_select: {np.sum(y_select) / np.sum(x_select)}')

    return np.float32(y_regression), np.float32(slope), np.float32(intercept)
def YearlyStats(pdfs):
    
    #print('in yearly stats function')
    
    #where pdfs are for given lat, lon pair (a single grid cell)
    threshvals = [0, 10, 50]
    years = range(1948,2018)
    years3 = range(1948,2018,3)#for 3-year windows
    
    yearly_lag=   np.empty((len(threshvals),len(years3)))
    yearly_Itot=  np.empty((len(threshvals),len(years3)))
    yearly_Ifrac= np.empty((len(threshvals),len(years3)))
    yearly_prain= np.empty((len(threshvals),len(years3)))
    yearly_Icrit= np.empty((len(threshvals),len(years3)))
    yearly_Etanorm =  np.empty((len(threshvals),len(years3)))
    yearly_pdf =  np.empty((len(threshvals),len(years3),2,2,2))
    
    yearly_lag.fill(np.nan)
    yearly_Itot.fill(np.nan)
    yearly_Ifrac.fill(np.nan)
    yearly_prain.fill(np.nan)
    yearly_Icrit.fill(np.nan)
    yearly_Etanorm.fill(np.nan)
    
    Trend_Eta=   np.empty((len(threshvals)))
    Trend_Lag=   np.empty((len(threshvals)))
    Trend_Ifrac=   np.empty((len(threshvals)))
    Trend_Itot=   np.empty((len(threshvals)))
    Trend_Prain=   np.empty((len(threshvals)))
    trendlist=   np.empty((len(threshvals)))
    
    Trend_Eta.fill(np.nan)
    Trend_Lag.fill(np.nan)
    Trend_Ifrac.fill(np.nan)
    Trend_Itot.fill(np.nan)
    Trend_Prain.fill(np.nan)
    trendlist.fill(np.nan)
    
    if np.size(pdfs)<2: #if no value for that lat,lon index (e.g. over water)
        return 0

    for p_ind,p in enumerate(threshvals):
          
        #print(s)
        
        pdfs_years = [pdfs[y][p_ind][:] for y,i in enumerate(years)]
     
        #also information measures for 5-year moving windows
        for y_ind,y in enumerate(years3):
            
            start_ind = y - 1948
            end_ind = np.min([start_ind+3,len(years)])
                
            pdfs_3years = pdfs_years[start_ind:end_ind][:]
            avgs = np.average(pdfs_3years,axis=0)
            

            info_list=[]
            for a in avgs:
                info_list.append(compute_info_measures(a))

            Ivect = [float(info_list[i]['Itotal']) for i in range(0,18)]
            Ivect = np.asfarray(Ivect)
                       
            p_rain = np.sum(avgs[0][:][:][1]) 
            npts = 3*365
            n_ones = np.int(npts*p_rain)
            
            Icrit = find_Icrit_binary(n_ones,npts,100)
            Ivect[Ivect<Icrit]=0

            maxindex = np.argmax(Ivect)
            maxIval = np.max(Ivect)
                
            pdfmax = avgs[maxindex] 
            infomax = info_list[maxindex]
                
            if maxIval>0:
                Itot = float(infomax['Itotal'])
                Ifrac = float(infomax['Ifrac'])
                LagMax_all = maxindex
                Eta = float(infomax['Eta'])
            else:
                Itot = 0
                Ifrac=0
                LagMax_all = float('NaN')
                Eta = float('NaN')
          
            yearly_lag[p_ind,y_ind]=LagMax_all
            yearly_Itot[p_ind,y_ind] = Itot
            yearly_Ifrac[p_ind,y_ind] = Ifrac
            yearly_prain[p_ind,y_ind] =p_rain
            yearly_Icrit[p_ind,y_ind] =Icrit
            yearly_Etanorm[p_ind,y_ind] =Eta
            yearly_pdf[p_ind,y_ind,:,:,:]=pdfmax

        #determine annual trends
        Ifrac_avg = yearly_Ifrac[p_ind,:]
        Prain_avg = yearly_prain[p_ind,:]
        Eta_avg = yearly_Etanorm[p_ind,:]
        Lag_avg = yearly_lag[p_ind,:]
        
        #update: only compute trend on stat sig values (omit zeros and nan values) 
        Prain_avg = Prain_avg[Ifrac_avg>0]
        Lag_avg = Lag_avg[Ifrac_avg>0]
        Eta_avg = Eta_avg[Ifrac_avg>0]
                                        
        YearVect = np.asarray(years3)
        YearVect= YearVect[Ifrac_avg>0]
        
        Ifrac_avg = Ifrac_avg[Ifrac_avg>0]
        
        if len(YearVect)>15:
        
            SenIfrac = stats.theilslopes(Ifrac_avg,YearVect, 0.9)
            SenPrain = stats.theilslopes(Prain_avg,YearVect, 0.9)
            SenEta = stats.theilslopes(Eta_avg,YearVect, 0.9)
            SenLag = stats.theilslopes(Lag_avg,YearVect, 0.9)
        
            #print SenItot[0], SenIfrac[0], SenPrain[0]
    
            sign_Ifrac_slope = np.sign(SenIfrac[2])*np.sign(SenIfrac[3])
            sign_Prain_slope = np.sign(SenPrain[2])*np.sign(SenPrain[3])
            sign_Eta_slope = np.sign(SenEta[2])*np.sign(SenEta[3])
            sign_Lag_slope = np.sign(SenLag[2])*np.sign(SenLag[3])
        
            SenIfrac_ind=np.sign(SenIfrac[0])
            SenPrain_ind=np.sign(SenPrain[0])
            SenEta_ind = np.sign(SenEta[0])
            SenLag_ind =np.sign(SenLag[0])
            
            if SenIfrac[0] >0:
                SenIfrac_ind = 1
            else:
                SenIfrac_ind = -1
                
            if SenPrain[0] >0:
                SenPrain_ind = 10
            else:
                SenPrain_ind = -10
                
            if SenEta[0] >0:
                SenEta_ind = 100
            else:
                SenEta_ind = -100
                
            if SenLag[0] >0:
                SenLag_ind = 1000
            else:
                SenLag_ind = -1000
                
            SenIfrac=SenIfrac[0]
            SenLag=SenLag[0]
            SenPrain=SenPrain[0]
            SenEta=SenEta[0]
            
            
            if sign_Ifrac_slope<0:
                SenIfrac=0
                SenIfrac_ind=0
            if sign_Prain_slope<0:
                SenPrain=0
                SenPrain_ind=0
            if sign_Eta_slope<0:
                SenEta=0  
                SenEta_ind=0  
            if sign_Lag_slope<0:
                SenLag=0
                SenLag_ind=0
                
            
            trendlist[p_ind] = SenIfrac_ind+SenPrain_ind+SenEta_ind+SenLag_ind
            Trend_Ifrac[p_ind]=SenIfrac
            Trend_Prain[p_ind]=SenPrain
            Trend_Eta[p_ind]=SenEta
            Trend_Lag[p_ind]=SenLag
        else:
            trendlist[p_ind] =  float('NaN')
            Trend_Ifrac[p_ind]= float('NaN')
            Trend_Prain[p_ind]= float('NaN')
            Trend_Eta[p_ind]= float('NaN')
            Trend_Lag[p_ind]= float('NaN')
            
    results_dict = {'yearly_lag':yearly_lag, 'yearly_eta':yearly_Etanorm,'yearly_Ifrac':yearly_Ifrac,
                    'yearly_Itot':yearly_Itot, 'yearly_pdf':yearly_pdf,'yearly_prain':yearly_prain,
                    'trendlist':trendlist, 'Trend_Ifrac':Trend_Ifrac,'Trend_Prain':Trend_Prain,
                    'Trend_Eta':Trend_Eta,'Trend_Lag':Trend_Lag}
    
    return results_dict
def calc_gradients(dates, np_days, values, windows, timeseries_id,
                   csv_writer_verbose, csv_writer_simple):
    """
    Calculate Theilslopes for each window and indicate sifnificance.

    Theilslopes are robust trendlines. Result is directly plottable (i.e.
    contains two points for each slope), and indicates with a boolean whether
    significant changes occur:
     - up: significant positive change*
     - down: significant negative change*
     * this change is determined by comparing the confidence intervals of each
       theilslope. When these these confidence intervals do not overlap, a
       significant change is found. The confidence interval can be set by
       setting the THEILSLOPE_CONFIDENCE_INTERVAL.

    :param dates: dates of the series in the numpy datetime64 format
    :param values: one-dimensional numpy array with values
    :param windows: list of windows (ranges) for each set of hydrologic year
    :return: datewindow, valuewindow (both with the values for each window),
             up, down (whether a significant difference occurs up or down)
    """

    def calc_y(slopes, index, day):
        """calculate slope of ."""
        return slopes[1] + slopes[index] * np_days[day]

    # Calculate theilslopes
    slopes = [theilslopes(values[w[0]:w[1]], np_days[w[0]:w[1]],
                          THEILSLOPE_CONFIDENCE_INTERVAL)
              for w in windows]
    # the returned slopes contains, for each window: a [0](median slope),
    # [1](intercept), [2](lower slope), and a [3](upper slope)
    up = False
    down = False
    n_slopes = len(slopes) - 1
    years_significance = NR_YEARS_SIGNIFICANCE_IS_DETERMINED_FOR
    n_comparisons = NR_SIGNIFICANCE_COMPARISONS
    result = []
    for i, slope in enumerate(slopes):
        # Calculate each start and endpoint to be used by matplotlib.
        first = windows[i][0]
        last = windows[i][1]
        datewindow = [dates[first].astype(dt), dates[last].astype(dt)]
        valuewindow = [calc_y(slope, 0, first), calc_y(slope, 0, last)]
        # Determine whether significant differences occur with already
        # iterated slopes.
        if i > n_slopes - years_significance:
            for k in range(n_comparisons):
                j = n_slopes - k - 1
                if j < 0 or j >= i:
                    if n_slopes < years_significance:
                        years_significance = n_slopes
                        logger.debug("NR_YEARS_SIGNIFICANCE_IS_DETERMINED_FOR "
                                     "too large, set to {}".format(n_slopes))
                    if n_slopes < n_comparisons:
                        n_comparisons = n_slopes
                        logger.debug("NR_SIGNIFICANCE_COMPARISONS "
                                     "too large, set to {}".format(n_slopes))
                    continue
                this_down = slope[3] < slopes[j][2]
                this_up = slope[2] > slopes[j][3]
                # Store significant changes to csv when set
                if (this_up or this_down) and WRITE_SIGNIFICANT_CHANGES:
                    significant_changes = (
                        timeseries_id,  # measurementpoint_id
                        this_up,  # change_up?
                        this_down,  # change_down?
                        result[j][0][0],  # first_datewindow_start
                        result[j][0][1],  # first_datewindow_start
                        datewindow[0],  # last_datewindow_start
                        datewindow[1],  # last_datewindow_end
                        slopes[j][2],  # first_upper_bound
                        slopes[j][0],  # first_median_slope
                        slopes[j][3],  # first_lower_bound
                        slope[2],  # last_upper_bound
                        slope[0],  # last_median_slope
                        slope[3]  # last_lower_bound
                    )
                    new_line = [str(x) for x in significant_changes]
                    csv_writer_verbose.writerow(new_line)
                up = this_up or up
                down = this_down or down
        # Tabbed one level out of the if loop, because we want to write all
        # changes for all date windows.
        if WRITE_SIGNIFICANT_CHANGES:
            simple_changes = (
                timeseries_id,
                up,
                down,
                up and down,  # both
                datewindow[0],  # datewindow_start
                datewindow[1],  # datewindow_end
                slope[2],  # upper_bound
                slope[0],  # median_slope
                slope[1]  # lower_bound
            )
            simple_changes = [str(x) for x in simple_changes]
            csv_writer_simple.writerow(simple_changes)
        result.append((datewindow, valuewindow, up, down))
    return result
Example #30
0
def theil_slopes(data_set, var_code, verbose=False):
    """
    Pixel-wise trends using Theil-Sen slope estimator.

    Parameters
    ----------
    data_set : xarray Dataset object
        Input data containting `time` dimension.         
    var_code : str
        Name of the variable inside `data_set` object.       
    verbose : bool, optional, default is False
        If True, then prints a progress bar for loop over spatial grid points.

    Returns
    -------
    results : xarray Dataset object
        Results of Theil-Sen estimator. This object contains variables for 
        slope and intercept for each grid point.
    """

    # Extract xarray DataArray object.
    data_array = getattr(data_set, var_code)

    # Only land pixels.
    if "land_mask" in data_set.coords:
        data_array = data_array.where(data_array.land_mask == True, drop=True)

    # Prepare data for the analysis.
    data_array = cdlearn.utils.organize_data(data_array)

    # Time, latitude, and longitude (strings).
    dim0, dim1, dim2 = cdlearn.utils.normalize_names(data_array)

    # Extract data as numpy arrays.
    Y = data_array.values  # Dependent variable in regression.
    X = np.arange(Y.shape[0])  # Independent variable in regression.
    Ycol = Y.reshape((Y.shape[0], -1))  # Colapse latitude and longitude.

    # Element-wise mask for not a numbers.
    mask_nan = np.isnan(Y)

    # Colapse latitude and longitude.
    mask_nan = mask_nan.reshape((mask_nan.shape[0], -1))

    # This mask tells me all grid points where there are no data at all.
    mask_nan = np.all(mask_nan, axis=0)

    # Statistical results.
    r = np.nan * np.zeros((2, Ycol.shape[1]))

    if verbose:
        print(">>> Loop over grid points ...")
        time.sleep(1)
        bar = tqdm(total=Ycol.shape[1])

    # Loop over locations.
    for i in range(Ycol.shape[1]):

        # Good to go!
        if mask_nan[i] == False:

            # Aggregate results.
            slope, intercept, _, _ = stats.theilslopes(Ycol[:, i], x=X)
            r[0, i] = slope
            r[1, i] = intercept

        if verbose:
            bar.update(1)

    # Close progress bar.
    if verbose:
        bar.close()

    # New shape: (results, latitude, longitude).
    r = r.reshape((2, Y.shape[1], Y.shape[2]))

    # Put results as an xarray DataArray object.
    results = xr.Dataset(data_vars={
        "slopes": ((dim1, dim2), r[0, :, :]),
        "intercepts": ((dim1, dim2), r[1, :, :])
    },
                         coords={
                             dim1: getattr(data_array, dim1),
                             dim2: getattr(data_array, dim2)
                         })

    # Maintain land mask coordinate into results.
    if "land_mask" in data_array.coords:
        results.coords["land_mask"] = data_array.land_mask

    return results
    def calc_soiling_rate(self, mode):
        # check which mode of operation
        if self.mode == 'eds_data.csv':
            # get the file to find the soiling rate
            output = self.output_loc+"/eds_sorted.csv"
            df = self.load_sorted(output)
            labels = ['EDS1_PRE', 'EDS2_PRE', 'EDS3_PRE', 'EDS4_PRE', 'EDS5_PRE', 'CTRL1_PRE', 'CTRL2_PRE',
                      'EDS1_POST','EDS2_POST','EDS3_POST','EDS4_POST','EDS5_POST','CTRL1_POST','CTRL2_POST']
            # declare soiling ratio dictionary
            soiling_ratios = {
                'EDS1_PRE':[],
                'EDS2_PRE':[],
                'EDS3_PRE':[],
                'EDS4_PRE':[],
                'EDS5_PRE':[],
                'CTRL1_PRE':[],
                'CTRL2_PRE':[],
                'EDS1_POST':[],
                'EDS2_POST':[],
                'EDS3_POST':[],
                'EDS4_POST':[],
                'EDS5_POST':[],
                'CTRL1_POST':[],
                'CTRL2_POST':[],
            }
            # get the soiling ratio values
            data_SR = df[['EDS/CTRL(#)', 'SR_Before','SR_After']]
            data_SR.set_index('EDS/CTRL(#)', inplace=True)
            

            EDS1 = data_SR.loc[['EDS1']]
            EDS2 = data_SR.loc[['EDS2']]
            EDS3 = data_SR.loc[['EDS3']]
            EDS4 = data_SR.loc[['EDS4']]
            EDS5 = data_SR.loc[['EDS5']]
            CTRL1 = data_SR.loc[['CTRL1']]
            CTRL2 = data_SR.loc[['CTRL2']]

            #SR Before
            EDS1_Pre = EDS1[['SR_Before']].values.flatten()
            soiling_ratios['EDS1_PRE'].extend(EDS1_Pre)

            EDS2_Pre = EDS2[['SR_Before']].values.flatten()
            soiling_ratios['EDS2_PRE'].extend(EDS2_Pre)

            EDS3_Pre = EDS3[['SR_Before']].values.flatten()
            soiling_ratios['EDS3_PRE'].extend(EDS3_Pre)

            EDS4_Pre = EDS4[['SR_Before']].values.flatten()
            soiling_ratios['EDS4_PRE'].extend(EDS4_Pre)

            EDS5_Pre = EDS5[['SR_Before']].values.flatten()
            soiling_ratios['EDS5_PRE'].extend(EDS5_Pre)

            CTRL1_Pre = CTRL1[['SR_Before']].values.flatten()
            soiling_ratios['CTRL1_PRE'].extend(CTRL1_Pre)

            CTRL2_Pre = CTRL2[['SR_Before']].values.flatten()
            soiling_ratios['CTRL2_PRE'].extend(CTRL2_Pre)

            #Sr After
            EDS1_Post = EDS1[['SR_After']].values.flatten()
            soiling_ratios['EDS1_POST'].extend(EDS1_Post)

            EDS2_Post = EDS2[['SR_After']].values.flatten()
            soiling_ratios['EDS2_POST'].extend(EDS2_Post)

            EDS3_Post = EDS3[['SR_After']].values.flatten()
            soiling_ratios['EDS3_POST'].extend(EDS3_Post)

            EDS4_Post = EDS4[['SR_After']].values.flatten()
            soiling_ratios['EDS4_POST'].extend(EDS4_Post)

            EDS5_Post = EDS5[['SR_After']].values.flatten()
            soiling_ratios['EDS5_POST'].extend(EDS5_Post)

            CTRL1_Post = CTRL1[['SR_After']].values.flatten()
            soiling_ratios['CTRL1_POST'].extend(CTRL1_Post)

            CTRL2_Post = CTRL2[['SR_After']].values.flatten()
            soiling_ratios['CTRL2_POST'].extend(CTRL2_Post)

            # declare soiling rate dictionary
            soiling_rates = {
                'EDS1_PRE':0,
                'EDS2_PRE':0,
                'EDS3_PRE':0,
                'EDS4_PRE':0,
                'EDS5_PRE':0,
                'CTRL1_PRE':0,
                'CTRL2_PRE':0,
                'EDS1_POST':0,
                'EDS2_POST':0,
                'EDS3_POST':0,
                'EDS4_POST':0,
                'EDS5_POST':0,
                'CTRL1_POST':0,
                'CTRL2_POST':0,
            }
            # error check the data to make sure it can compute soiling rate
            if len(soiling_ratios['EDS1_PRE'])==0:
                return "error"
            elif len(soiling_ratios['EDS1_PRE'])==1:
                return "error"
            # calculate the soiling rate values
            for y in labels:
                soiling_rates[y] = stats.theilslopes(soiling_ratios[y], range(len(soiling_ratios[y])), 0.90)[0].round(2)
            # return the dictionary
            return soiling_rates
        elif self.mode == 'testing_data.csv':
            # update the soiling rate value
            self.sr_label.config(text= "Soiling Rate: N/A (This mode does not measure SR)")
        else:
            # return error
            self.error_label.config(text="Select Valid CSV File For Analysis")
def rel_time_attr_MK(dataFrame71):
    """
    Theil-Sen-Slope estimation and Mann-Kendall-Test to estimate the
    contribution of each driver!

    Parameters
    ----------
    dataFrame71 : time series
        Time series

    Returns
    -------
    regH : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 1980 fixed exposure)(TS_Haz) 1980-2010
    regHE : List MK-output
        Sen_slope and MK-test result with uncertainty range of TS_HazExp
        1980-2010
    regF : List MK-output
        Sen_slope and MK-test result with uncertainty range of TS_Full
        1980-2010.
    regH7 : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 1980 fixed exposure)(TS_Haz) 1971-2010
    regH107 : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 2010 fixed exposure)(TS_Haz) 1971-2010
    regH10 : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 2010 fixed exposure)(TS_Haz) 1980-2010
    regE : List MK-output
        Sen_slope and MK-test result with uncertainty range of exposure
        difference function (TS_HazExp - TS_Haz) 1980-2010 (not used)
    regE7 : List MK-output
        Sen_slope and MK-test result with uncertainty range of exposure
        difference function (TS_HazExp - TS_Haz) 1971-2010 (not used)
    regV : List MK-output
        Sen_slope and MK-test result with uncertainty range of vulnerability
        difference function (TS_full - TS_Haz_Exp)(not used)
    regI : List MK-output
        Sen_slope and MK-test result with uncertainty range of modeled damges
        (including vulnerability)
    regN : List MK-output
        Sen_slope and MK-test result with uncertainty range of observed damages

    """

    dataFrame = dataFrame71[dataFrame71['Year'] > 1979]

    regLHazExp = mk.original_test(dataFrame['Norm_Impact_2y_trend'], alpha=0.1)

    slopeLHazExp = stats.theilslopes(dataFrame['Norm_Impact_2y_trend'],
                                     alpha=0.1)

    regHE = [regLHazExp.slope, regLHazExp.p, slopeLHazExp[2], slopeLHazExp[3]]

    regLFull = mk.original_test(dataFrame['Norm_Impact_Pred'], alpha=0.1)

    slopeLFull = stats.theilslopes(dataFrame['Norm_Impact_Pred'], alpha=0.1)

    regF = [regLFull.slope, regLFull.p, slopeLFull[2], slopeLFull[3]]

    regHaz = mk.original_test(dataFrame['Norm_ImpFix_2y_trend'], alpha=0.1)

    slopeHaz = stats.theilslopes(dataFrame['Norm_ImpFix_2y_trend'], alpha=0.1)

    regH = [regHaz.slope, regHaz.p, slopeHaz[2], slopeHaz[3]]

    regHaz7 = mk.original_test(dataFrame71['Norm_ImpFix_2y_trend'], alpha=0.1)

    slopeHaz7 = stats.theilslopes(dataFrame71['Norm_ImpFix_2y_trend'],
                                  alpha=0.1)

    regH7 = [regHaz7.slope, regHaz7.p, slopeHaz7[2], slopeHaz7[3]]

    regHaz107 = mk.original_test(dataFrame71['Norm_Imp2010_2y_trend'],
                                 alpha=0.1)

    slopeHaz107 = stats.theilslopes(dataFrame71['Norm_Imp2010_2y_trend'],
                                    alpha=0.1)

    regH107 = [regHaz107.slope, regHaz107.p, slopeHaz107[2], slopeHaz107[3]]

    regHaz10 = mk.original_test(dataFrame['Norm_Imp2010_2y_trend'], alpha=0.1)

    slopeHaz10 = stats.theilslopes(dataFrame['Norm_Imp2010_2y_trend'],
                                   alpha=0.1)

    regH10 = [regHaz10.slope, regHaz10.p, slopeHaz10[2], slopeHaz10[3]]

    regNat = mk.original_test(dataFrame['natcat_flood_damages_2005_CPI'],
                              alpha=0.1)

    slopeNat = stats.theilslopes(dataFrame['natcat_flood_damages_2005_CPI'],
                                 alpha=0.1)

    regN = [regNat.slope, regNat.p, slopeNat[2], slopeNat[3]]

    return regH, regHE, regH7, regH107, regH10, regF, regN
Example #33
0
        slope[i] = out[0]
        upper_uncert[i] = out[3] - out[0]
        lower_uncert[i] = out[0] - out[2]

    return slope, lower_uncert, upper_uncert


ratios = [highres / lowres for highres, lowres in zip(highres_pts, lowres_pts)]

slopes, low_slope, high_slope = \
    sentheil_perchan(radii, ratios)

# Fit for all overlap points
all_slope, inter, all_low_slope, all_high_slope = \
    theilslopes(np.hstack(ratios), x=np.hstack(radii).value)

chans = range(11, 26)

plt.errorbar(chans, slopes, yerr=[low_slope, high_slope], alpha=0.5)
# plt.plot(chans, slope_lowess_85)
plt.axhline(0, linestyle='--')
plt.axhline(all_slope)
plt.fill_between(chans, all_low_slope, all_high_slope, alpha=0.5)
plt.ylabel("Slope")
plt.xlabel("Channel")
plt.grid()
plt.tight_layout()
plt.savefig(os.path.join(figure_folder, "NOEMA_30m_overlap_ratio_slope.png"))
plt.savefig(os.path.join(figure_folder, "NOEMA_30m_overlap_ratio_slope.pdf"))
plt.close()
Example #34
0
fin = "/home/lunet/gytm3/Everest2019/Research/OneEarth/Data/HadCRU.txt"
nt = 2020 - 1850 + 1
count = 1
row = 0
t = np.zeros((nt, 14))
with open(fin) as f:
    for l in f:
        if count % 2. == 1:
            t[row] = l.split()
            row += 1
        count += 1
t = t[:-1, :]
yr = t[:, 0]
tann = t[:, -1]
fig, ax = plt.subplots(1, 1)
ax.plot(yr, tann)
ax.grid()

# Cut out the 1981-2010 mean and see how much warmer than PI
ctrl = tann[np.logical_and(yr > 1980, yr < 2011)]
pi = tann[np.logical_and(yr > 1849, yr < 1880)]
dpi = np.mean(ctrl) - np.mean(pi)
print("Warming so far = %.2fC" % dpi)

# Trend over 1979-2010
years = np.arange(1979, 2020)
trend,intercept,lower,upper=\
stats.theilslopes(tann[np.logical_and(yr>1978,yr<2020)], years, 0.95)
print("Trend = %.3fC/decade (%.3f,%.3f)" %
      (trend * 10, lower * 10, upper * 10))
Example #35
0
human_evolution = [descendent(human(),human())]
time = [0]
conjunto_puntos = []

for t in xrange(1,1000):
  human_evolution.append(descendent(human_evolution[-1],human()))
  time.append(t)
  conjunto_puntos.append([time[-1], human_evolution[-1]])

x = [v[0] for v in conjunto_puntos]
y = [v[1] for v in conjunto_puntos]

x = np.array(x)
y = np.array(y)

res = stats.theilslopes(y, x, 0.999)
lsq_res = stats.linregress(x, y)

print res
print lsq_res

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y, 'b.')
ax.plot(x, res[1] + res[0] * x, 'r-')
ax.plot(x, res[1] + res[2] * x, 'r--')
ax.plot(x, res[1] + res[3] * x, 'r--')
ax.plot(x, lsq_res[1] + lsq_res[0] * x, 'g-')
plt.show()