def test_regress_simple():
    # Regress a line with sinusoidal noise. Test for #1273.
    x = np.linspace(0, 100, 100)
    y = 0.2 * np.linspace(0, 100, 100) + 10
    y += np.sin(np.linspace(0, 20, 100))

    slope, intercept, r_value, p_value, sterr = mstats.linregress(x, y)
    assert_almost_equal(slope, 0.19644990055858422)
    assert_almost_equal(intercept, 10.211269918932341)

    # test for namedtuple attributes
    res = mstats.linregress(x, y)
    attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr')
    check_named_results(res, attributes, ma=True)
    def calc_rwr(self, days):
        """Calculate RWR stats over some sub-period of the composite.

        RWR stats are calculated if there are at least two days with data.

        days : slice
            Subperiod over which RWR is calculated.

        self.stats = [
        ] * 5

        if self.bounds is not None:
            days_c = self.get_days()

            x, y = days_c[days], self.td[days]
            if np.ma.count(y) > 1:
                self.stats = linregress(x, y)

            self.regress_day_start = days_c[days][0]
            self.regress_day_end = days_c[days][-1]

def plot_regression(data_dict, type):
    fit = linregress(data_dict["mean"], data_dict["rx1day"])
             fit.intercept + fit.slope * data_dict["mean"],
             label=f'{type} least squares fit')

    return fit
def RMSEu(obs, mod, axis=None):
    """Root Mean Squared Error unsystematic (mod_hat, mod)"""
    from scipy.stats.mstats import linregress
    if axis is None:
            m, b, rval, pval, stderr = linregress(obs, mod)
            mod_hat = b + m * obs
            return RMSE(mod_hat, mod)
        except ValueError:
            return None
        myvals = apply_along_axis_2v(lambda x, y: linregress(x, y), axis, obs,
        myvals = np.rollaxis(myvals, myvals.ndim - 1, 0).astype(obs.dtype)
        m, b, rval, pval, stderr = myvals
        mod_hat = b + m * obs
        result = RMSE(mod_hat, mod, axis=axis)
        return result
def test(model, test_inputs, test_labels):

    Runs through one epoch - all testing examples

    :param model: the trained model to use for prediction

    :param test_inputs: train inputs (all inputs for testing) of shape (num_images, 150, 130, 3)

    :param test_labels: train labels (all labels for testing) of shape (num_images,)

    :returns: average R^2 across all batches


    n = 0

    R_squared = 0

    batch_size = model.batch_size

    num_iterations = int(np.size(test_inputs, 0) / batch_size)

    all_predicted_scores = []

    for i in range(num_iterations):

        batch_inputs = train_inputs[n:n + batch_size, :, :, :]
        batch_inputs = tf.convert_to_tensor(batch_inputs)
        batch_scores = train_labels[n:n + batch_size, ]
        batch_scores = tf.convert_to_tensor(batch_scores)
        n += batch_size

        predicted_scores = model.call(batch_inputs, False)

        m, b, r, p, e = linregress(y=tf.reshape(batch_scores, [-1, 1]),
                                   x=tf.reshape(predicted_scores, [-1, 1]))

        r2 = r**2

        if i == 0:

            all_predicted_scores = predicted_scores


            all_predicted_scores = tf.concat(
                [all_predicted_scores, predicted_scores], axis=0)

        R_squared += r2

    return R_squared / num_iterations, all_predicted_scores
def get_linear_trend(x, y, time_dim):
    slope, intercept, r_value, p_value, std_err = mstats.linregress(x,y)

    if p_value > 0.05:
        print('    Not significant: {:.3f} with mean {:.2f}'.format(p_value, np.mean(y.data)))
        print('    Is  significant: {:.3f} with mean {:.2f}'.format(p_value, np.mean(y.data))) 

    print('        Trend: total {:.3f} or {:.3f}/yr'.format(slope*len(y), slope*len(y)/time_dim))

    return slope, intercept
    def calc_rwr(td):
        s = slice(ndays_ante + regress_day_start,
                  ndays_ante + regress_day_end + 1)
        x, y = days[s], td[s]
        if np.ma.count(y) > 1:
            stats = linregress(x, y)
            stats = [
            ] * 5

        return stats
def logisticregr(ax, x, y, epsilon=0.00001):
    mmin = min(y)-epsilon
    mmax = max(y)+epsilon
    slope, intercept, r_value, p_value, std_err = stats.linregress(x,invy)
    ax.set_title('f(x)=1/(2+2exp( %.2fx %s %.2f ))+(1/2)'%(-slope, ['+','-'][intercept>0],
    ax.plot(x, regression(x, intercept, slope)*(mmax-mmin)+mmin, label='approximation')
def linear_regression(array1, array2):
    #ensure they are arrays
    array1 = np.asanyarray(array1) 
    array2 = np.asanyarray(array2) 
    #remove possible nans
    array1 = array1[~np.isnan(array1)]
    array2 = array2[~np.isnan(array2)]
    m, b, rval, pval, stderr = linregress(np.ravel(array1),np.ravel(array2))
    r2 = round(rval**2, 2) #round to two decimal places
    if pval <= 0.01: #if smaller, then print change to scientific notation
        pval = '{:.1e}'.format(pval) 
    else: #if larger, round to 2 decimalm places
        pval = round(pval,2) 
    m = round(m,2) #round to two decimal places
    return m, b, rval, r2, pval, stderr
    def calcCorCoef( self, var1, var2):
        result = stats.linregress( var1, var2)
        self.slope = result[0]
        self.intercept = result[1]
        if self.verbose:
            print( "slope = ", self.slope )
            print( "intercept = ", self.intercept )
            print( "r_value = ",result[2])
            print( "std_err = ", result[4] )
            print( "t value = ", np.abs( result[0] ) /  result[4] )
            print( "tCrit = ", self.tTable[ min( self.regPoints, 101)]\
                   [ self.pIndex])
    def clean_gaps_w_lin_regress(self, start_idx):
        Function to clean gaps in the data with a linear regression.

        start_idx : integer
            First non-masked value of array.
        non_zero_idx = np.transpose(self.xs.nonzero())
        for i in xrange(self.rows_N):
            idx = non_zero_idx[np.where(non_zero_idx[:, 0] == i)][:, 1]
            if idx.any():
                slope, intercept, r, p, se = mstats.linregress(self.yrs[idx], self.xs[i,idx])
                missing_xs = ma.where(self.xs[i, start_idx[i]:-self.keep_n_values].mask)[0] + start_idx[i]
                if np.any(missing_xs):
                    self.xs[i, missing_xs] = (self.min_year + missing_xs) * slope + intercept
def lineartrend1d(y, x=None, alpha=0.05):
    y = np.array(y).flatten()
    if x is None:
        x = np.arange(len(y), dtype=float)
        x = np.array(x, dtype=float).flatten()

    # Do own masking of missing values
    isfinite_mask = np.isfinite(y)
    y = y[isfinite_mask]
    x = x[isfinite_mask]
    # Catching the case of less than two valid points
    if y.size > 1:
        linoutput = linregress(x, y)
        return linoutput.slope, linoutput.pvalue
        return np.nan, np.nan
def plot_3panel(x, y, model):
    plt.figure(figsize=[19.2, 4.85])
    # for plotting a colorbar
    vmn = np.ma.min([x.data, y.data])
    vmx = np.ma.max([x.data, y.data])
    if vmx < 0:
        vmx = 0.001
    if vmn > 0:
        vmn = -0.001
    divnorm = mcolors.TwoSlopeNorm(vmin=vmn, vcenter=0, vmax=vmx)

    # x
    plt.subplot(1, 3, 1)
    pmesh = iplt.pcolormesh(x, norm=divnorm, cmap='RdBu')
    plt.title('Mean pr')

    # y
    plt.subplot(1, 3, 2)
    iplt.pcolormesh(y, norm=divnorm, cmap='RdBu')

    plt.subplot(1, 3, 3)
    plt.scatter(x.data, y.data, s=1)
    plt.xlabel('Mean pr')
    # compute simple linear regression
    fit = linregress(x.data.flatten(), y.data.flatten())
    plt.title(f"R: {fit.rvalue:.2f}. Slope: {fit.slope:.2f}")

    plt.suptitle(f"{model} %")

    # add colorbar for map plots
    fig = plt.gcf()
    cbax = fig.add_axes([0.125, 0.1, 0.5, 0.075])
    fig.colorbar(pmesh, cax=cbax, orientation="horizontal")

    # save plot
 def trend_linear(self, alpha=0.05):
     xaxis = self.data_ts.index.to_julian_date()
     yaxis = self.data_ts.values
     results = linregress(xaxis, yaxis)
     self.fitted['linear'] = xaxis * results.slope + results.intercept
     results_dict = {}
     results_dict['slope'] = results.slope * (365.25 * 10
                                              )  # From /day to /decade
     if results.pvalue <= alpha:
         results_dict['sign'] = int(np.sign(results.slope))
         results_dict['sign'] = int(0)
     results_dict['pvalue'] = results.pvalue
     results_dict['stderr'] = results.stderr * (365.25 * 10)
         'slope_low'] = results_dict['slope'] - results_dict['stderr']
         'slope_up'] = results_dict['slope'] + results_dict['stderr']
     self.__add_to_logbook__('Calculated linear trend test')
     results_dict['method'] = 'linear'
     return results_dict
user_score = ma.masked_object(user_score, user_count)

rating = np.asarray(data.Rating.tolist())

# ============================= Global Sales Code =============================
# - Masking Global Sales 3 STD away from medium
global_sales = np.asarray(data.Global_Sales.tolist())  # millions
global_sales_mean = sp.mean(global_sales)
global_sales_std = sp.std(global_sales)
global_sales = ma.masked_outside(global_sales,
                                 global_sales_mean - global_sales_std * 3,
                                 global_sales_mean + global_sales_std * 3)

# - Regression Critic Score vs Global Sales
slope, intercept, r_value, p_value, std_err = \
    mstats.linregress(critic_score, global_sales)
t_score = slope / std_err

# - Print
with open('results.txt', 'w') as f:
    f.write('Global Sales Regression\nSlope (Million per 1): ' + str(slope) +
            '\nIntercept: ' + str(intercept) + '\nR Value: ' + str(r_value) +
            '\nP Value: ' + str(p_value) + '\nStandard Error: ' +
            str(std_err) + '\nT Stat: ' + str(t_score))
    if t_score > 1.96:
        f.write('; Reject Null Hypothesis\n\n')
        f.write('; Accept Null Hypothesis\n\n')

# - For plotting regression line
x = np.arange(1, 100)
def main():
    #                             Initializations

    region       = 'poleS' 

    #Global Data Directory
    GDataDir     = '/data1/projects/'
    #three letter ID ; ID in the HDF Files
    if region    == 'poleN':
        locs         = ['kir', 'tab', 'spu', 'bre', 'eur', 'nya']
        locID        = ['kiruna', 'thule', 'st.petersburg', 'bremen', '_eureka_', 'ny.alesund'] 
        pltID        = ['Kiruna', 'Thule', 'St Petersburg', 'Bremen', 'Eureka', 'Ny Alesund'] 
    elif region == 'middleN':
        locs         = ['zgp', 'rkb',  'iza']  #'tor',  'jfj'
        locID        = ['zugspitze',  'rikubetsu', 'izana']  #'_toronto_', 'jungfraujoch', 
        pltID        = ['Zugspitze',  'Rikubetsu', 'Izana'] #'Toronto',  'Jungfraujoch',
    elif region == 'tropics':
        locs         = ['mlo', 'alz']#, 'pmb']
        locID        = ['mauna.loa.h', 'altzomoni']#, 'paramaribo'] 
        pltID        = ['Mauna Loa', 'Altzomoni']#, 'Paramaribo']
    elif region == 'middleS':
        locs         = ['std', 'mai', 'wlg', 'ldr']
        locID        = ['stdenis', 'maido', 'wollongong', 'niwa001' ] 
        pltID        = ['St Denis', 'Maido', 'Wollongong', 'Lauder']

    elif region == 'poleS':
        locs         = ['ahs']
        locID        = ['arrival.heights'] 
        pltID        = ['AHTS']
        print 'An error ocurred: region is not defined'
    gasName1      = 'ocs'
    gasName2      = 'n2o'

    AvgType        = 'Monthly'   #'Monthly'  'Daily'
    smthFlg        = False
    period         = 1.0
    fitFlg         = False

    ColFlg         = False

    # Flags
    saveFlg       = True                  # Flag to either save data to pdf file (saveFlg=True) or plot to screen (saveFlg=False)
    errorFlg      = False                  # Flag to process error data
    fltrFlg       = True                   # Flag to filter the data

    dateFlg       = True                  # Flag to filter based on min and max dates
    tcFlg         = True                   # Flag to filter total column amount < 0
    tcMMFlg       = True                   # Flag to filter based on min and max total column amount
    pcFlg         = True                     # Flag to filter profiles with negative partial columns
    szaFlg        = True                   # Flag to filter based on min and max SZA    

    minSZA        = 0.0                    # Min SZA for filtering
    maxSZA        = 90.0                   # Max SZA for filtering
    maxTC         = 1.0e25                 # Max Total column amount for filtering
    minTC         = 0.0                    # Min Total column amount for filtering

    iyear         = 2009   
    imonth        = 1
    iday          = 1
    fyear         = 2016
    fmonth        = 12
    fday          = 31
    sclfct        = 1.0E9                  # Scale factor to apply to vmr plots (ppmv=1.0E6, ppbv=1.0E9, etc)
    sclfctName    = 'ppb'                 # Name of scale factor for labeling plots
    TCsclfct      = 1.0e16
    TCsclfctName  = 'x10$^{16}$'

    pColsFlg      = True                   #Calculate tropospheric and stratospheric columns?

    pltPcol       = False                  #plot the time series in partial columns
    pltWvmr       = True                   #plot the time series in weighted VMR
    Adth          = 16.0                   #Altitude in km of tropopause in case NCEP or DTH is not available
    offH          = 5.0                    #Additional altitude above the tropopause height

    # Flag for Plots

                                    #        --- START ---       #

    #Name of PDF with Figures
    if ColFlg: pltFile = GDataDir+'/ocs/figures/LifeTime_OCS_Column_'+region+'.pdf'
    else: pltFile = GDataDir+'/ocs/figures/LifeTime_OCS_pCol_'+region+'.pdf'

    if saveFlg: pdfsav = PdfPages(pltFile)
    else: pdfsav = ''

    # Check file and directories
    dataDir1    = [GDataDir+gasName1+'/'+l+'/'  for l in locs]
    dataDir2    = [GDataDir+gasName2+'/'+l+'/'  for l in locs]

    for d in dataDir1:  ckDir(d,exit=True)
    for d in dataDir2:  ckDir(d,exit=True)

    # Create instance of output data class   
    statDataCl  = OrderedDict()
    statDataCl2 = OrderedDict()

    Group1 = zip(dataDir1,locID, pltID, locs)
    Group1.sort(key=lambda Group1: Group1[2])

    Group2 = zip(dataDir2,locID, pltID, locs)
    Group2.sort(key=lambda Group2: Group2[2])

    locs = [l for dd, id, pl, l in Group1]

    for dd, id, pl, l in Group1:

        # Some HDF files are in specific folder: change here accordingly
        if pl == 'Wollongong':      dd = dd + 'ocs_hippov2/'
        elif pl == 'Jungfraujoch' : dd = dd + 'OCS.39_1b3144b4fe4a58f29f1f_/'
        elif pl == 'Toronto' :      dd = dd + 'OCS/'
        elif pl == 'Eureka' :       dd = dd + 'OCS/'
        elif pl == 'Rikubetsu':     dd = dd + 'HDF_Fil4/'
        elif pl == 'Tsukuba' :      dd = dd + 'HDFfiles/'
        elif pl == 'Zugspitze':     dd = dd + 'OCS_Zugspitze/'
        elif pl == 'Kiruna':        dd = dd + 'OCS_Kiruna/'
        elif pl == 'Izana':         dd = dd + 'OCS_Izana/'
        elif pl == 'St Petersburg': dd = dd + 'HDF_OCS_SPb_O3_atm16/'
        elif pl == 'Paris':         dd = dd + '2019_Paris/'
        else: dd = dd

        statDataCl[pl]  = dc.ReadHDFData(dd, id, gasName1)
    for dd, id, pl, l in Group2:

        statDataCl2[pl] = dc.ReadHDFData(dd, id, gasName2)

    # Variables from HDF files 
    datesJD2K    = OrderedDict()
    rPrf         = OrderedDict();  rPrf_2         = OrderedDict() #retrieved Prf in mixing ratio
    aPrf         = OrderedDict()   #apriori Prf in mixing ratio
    rPrfMol      = OrderedDict();  rPrfMol_2      = OrderedDict()   #retrieved Prf partial Column (molec/cm2)
    aPrfMol      = OrderedDict()   #apriori Prf partial Column (molec/cm2)
    totClmn      = OrderedDict();  totClmn_2      = OrderedDict() #retrieved total column (molec/cm2)
    atotClmn     = OrderedDict()   #apriori total column (molec/cm2)
    avkVMR       = OrderedDict()   #Averaging kernel (VMR)
    avkTC        = OrderedDict()   #Averaging kernel total column
    alt          = OrderedDict()   #Altitude 
    sza          = OrderedDict()   #Solar Zenith Angle
    TempPrf      = OrderedDict()   #Temperature Profile
    PresPrf      = OrderedDict()   #Pressure Profile

    # Variables calculated 
    #alt_orig     = OrderedDict()
    dates        = OrderedDict()
    dates_2       = OrderedDict()
    avkSCF       = OrderedDict()   #Averaging kernel (scale factor)
    dofs         = OrderedDict()   #degrees of freedom
    AirMPrf      = OrderedDict(); AirMPrf_2    = OrderedDict()   #Airmass
    rPrfMol      = OrderedDict()   #retrieved Prf in molec/cm2
    aPrfMol      = OrderedDict()   #apriori Prf in molec/cm2

    totWvmr      = OrderedDict()    #Weightet VMR A priori
    atotWvmr     = OrderedDict()

    alttpp       = OrderedDict()
    alttpp2      = OrderedDict()

    altbl1       = OrderedDict()
    altbl2       = OrderedDict()

    altft1       = OrderedDict()
    altft2       = OrderedDict()

    altst1       = OrderedDict()
    altst2       = OrderedDict()

    Lat          = []
    Lon          = []

    if errorFlg:
        tot_rnd       = OrderedDict()
        tot_sys       = OrderedDict()
        tot_std       = OrderedDict()
        vmr_rnd_err   = OrderedDict()
        vmr_sys_err   = OrderedDict()
        vmr_tot_err   = OrderedDict()

    if pColsFlg:
        dtp           = OrderedDict()
        datesdtp      = OrderedDict()
        PcolStrat     = OrderedDict()   #partial columns
        PcolTrop1     = OrderedDict()
        PcolTrop2     = OrderedDict()

        PcolStratapr  = OrderedDict()   #partial columns A priori
        PcolTropapr1  = OrderedDict()
        PcolTropapr2  = OrderedDict()

        WvmrStrat     = OrderedDict(); WvmrStrat_2     = OrderedDict()   #Weighted VMR
        WvmrTrop1     = OrderedDict()
        WvmrTrop1_2     = OrderedDict()
        WvmrTrop2     = OrderedDict(); WvmrTrop2_2     = OrderedDict()

        WvmrStratapr  = OrderedDict()    #Weighted VMR A priori
        WvmrTropapr1  = OrderedDict()
        WvmrTropapr2  = OrderedDict()

        rPcol         = OrderedDict(); rPcol_2         = OrderedDict() 
        aPcol         = OrderedDict()

        rPvmr         = OrderedDict(); rPvmr_2         = OrderedDict()
        aPvmr         = OrderedDict()

    for ii, idhdf in enumerate(pltID):

        print idhdf

        datesJD2K[idhdf]    = statDataCl[idhdf].HDF[statDataCl[idhdf].getDatetimeName()]
        dates[idhdf]        = dc.jdf_2_datetime(datesJD2K[idhdf])

        datesJD2K_2         = statDataCl2[idhdf].HDF[statDataCl2[idhdf].getDatetimeName()]
        dates_2[idhdf]      = dc.jdf_2_datetime(datesJD2K_2)

        alt[idhdf]          = statDataCl[idhdf].HDF[statDataCl[idhdf].getAltitudeName()]
        sza[idhdf]          = statDataCl[idhdf].HDF[statDataCl[idhdf].getAngleSolarZenithAstronomicalName()]
        conv                = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarName()+'VAR_SI_CONVERSION']            
        rPrf[idhdf]         = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarName()]*float(conv[0][1])*sclfct

        conv_2              = statDataCl2[idhdf].HDF[statDataCl2[idhdf].PrimaryGas.upper()+'.'+statDataCl2[idhdf].getMixingRatioAbsorptionSolarName()+'VAR_SI_CONVERSION']    
        rPrf_2[idhdf]       = statDataCl2[idhdf].HDF[statDataCl2[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarName()]*float(conv_2[0][1])*sclfct

        aPrf[idhdf]         = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarAprioriName()]*float(conv[0][1])*sclfct

        conv                = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnPartialAbsorptionSolarName()+'VAR_SI_CONVERSION']
        rPrfMol[idhdf]      = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnPartialAbsorptionSolarName()]*float(conv[0][1])*(6.02e23/100./100.)
        conv_2                = statDataCl2[idhdf].HDF[statDataCl2[idhdf].PrimaryGas.upper()+'.'+statDataCl2[idhdf].getColumnPartialAbsorptionSolarName()+'VAR_SI_CONVERSION']
        rPrfMol_2[idhdf]    = statDataCl2[idhdf].HDF[statDataCl2[idhdf].PrimaryGas.upper()+'.'+statDataCl2[idhdf].getColumnPartialAbsorptionSolarName()]*float(conv_2[0][1])*(6.02e23/100./100.)

        aPrfMol[idhdf]       = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnPartialAbsorptionSolarAprioriName()]*float(conv[0][1])*(6.02e23/100./100.)

        conv                = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnAbsorptionSolarName()+'VAR_SI_CONVERSION']
        totClmn[idhdf]      = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnAbsorptionSolarName()]*float(conv[0][1]) * (6.02e23) /100./100. / TCsclfct

        conv_2                = statDataCl2[idhdf].HDF[statDataCl2[idhdf].PrimaryGas.upper()+'.'+statDataCl2[idhdf].getColumnAbsorptionSolarName()+'VAR_SI_CONVERSION']
        totClmn_2[idhdf]     = statDataCl2[idhdf].HDF[statDataCl2[idhdf].PrimaryGas.upper()+'.'+statDataCl2[idhdf].getColumnAbsorptionSolarName()]*float(conv_2[0][1]) * (6.02e23) /100./100. / TCsclfct

        atotClmn[idhdf]     = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnAbsorptionSolarAprioriName()]*float(conv[0][1]) * (6.02e23) /100./100. / TCsclfct
        PresPrf[idhdf]      = statDataCl[idhdf].HDF[statDataCl[idhdf].getPressureIndependentName()]
        TempPrf[idhdf]      = statDataCl[idhdf].HDF[statDataCl[idhdf].getTemperatureIndependentName()]

        AltBo               = statDataCl[idhdf].HDF[statDataCl[idhdf].getAltitudeBoundariesName()]
        nobs                = rPrf[idhdf].shape[0]
        n_layer             = rPrf[idhdf].shape[1]

        if statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarAvkName() in statDataCl[idhdf].HDF.keys():
            avkVMR[idhdf]       = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarAvkName()]
            avkTC[idhdf]        = statDataCl[idhdf].HDF[statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getColumnAbsorptionSolarAvkName()]
            avkVMR[idhdf]  = np.empty([nobs,n_layer,n_layer])
            avkTC[idhdf]   = np.empty([nobs,n_layer,n_layer])

        AirMPrf[idhdf]     =  np.divide(rPrfMol[idhdf], rPrf[idhdf])*sclfct

        AirMPrf_2[idhdf]   =  np.divide(rPrfMol_2[idhdf], rPrf_2[idhdf])*sclfct

        if (idhdf == 'Kiruna') or (idhdf == 'Zugspitze') or (idhdf == 'Izana') or (idhdf == 'Paris'):
            alt[idhdf]          = alt[idhdf][0, :]
            alt[idhdf]          = alt[idhdf][0:n_layer]

        Lat_i           = statDataCl[idhdf].HDF[statDataCl[idhdf].getLatitudeInstrumentName()]
        Lon_i           = statDataCl[idhdf].HDF[statDataCl[idhdf].getLongitudeInstrumentName()]
        alt_instru      = statDataCl[idhdf].HDF[statDataCl[idhdf].getAltitudeInstrumentName()]


        print '\n'
        print idhdf
        print 'Latitude          = {0:.2f}'.format(Lat_i[0])
        print 'Longitude         = {0:.2f}'.format(Lon_i[0])
        print 'Altitude of Instr = {0:.2f}'.format(alt_instru[0])

        if statDataCl[idhdf].PrimaryGas.upper()+'.'+statDataCl[idhdf].getMixingRatioAbsorptionSolarAvkName() in statDataCl[idhdf].HDF.keys():
            avkSCF[idhdf]  = np.zeros((nobs,n_layer,n_layer))

            for obs in range(0,nobs):
                Iapriori        = np.zeros((n_layer,n_layer))
                IaprioriInv     = np.zeros((n_layer,n_layer))
                np.fill_diagonal(Iapriori, aPrf[idhdf][obs])
                np.fill_diagonal(IaprioriInv, 1.0 / (aPrf[idhdf][obs]))
                avkSCF[idhdf][obs,:,:] = np.dot(np.dot(IaprioriInv,np.squeeze(avkVMR[idhdf][obs,:,:])),Iapriori)

            dofs[idhdf]         = np.asarray([np.trace(aki) for aki in avkSCF[idhdf]])
            avkSCF[idhdf]  = np.zeros((nobs,n_layer,n_layer))

        # FILTER DATA
        if fltrFlg: statDataCl[idhdf].fltrData(statDataCl[idhdf].PrimaryGas,iyear=iyear, imonth=imonth, iday=iday, fyear=fyear, fmonth=fmonth, fday=fday, minsza=minSZA,
                                               mxsza=maxSZA,minTC=minTC,maxTC=maxTC, tcFlg=tcFlg,pcFlg=pcFlg,szaFlg=szaFlg,tcMMFlg=tcMMFlg, dateFlg=dateFlg)
        else:    statDataCl[idhdf].inds = np.array([]) 
            dates[idhdf]    = np.delete(dates[idhdf], statDataCl[idhdf].inds)
            sza[idhdf]      = np.delete(sza[idhdf], statDataCl[idhdf].inds)
            totClmn[idhdf]  = np.delete(totClmn[idhdf], statDataCl[idhdf].inds)
            atotClmn[idhdf] = np.delete(atotClmn[idhdf], statDataCl[idhdf].inds)
            rPrf[idhdf]     = np.delete(rPrf[idhdf], statDataCl[idhdf].inds, axis=0)
            rPrfMol[idhdf]  = np.delete(rPrfMol[idhdf], statDataCl[idhdf].inds, axis=0)
            aPrf[idhdf]     = np.delete(aPrf[idhdf], statDataCl[idhdf].inds, axis=0)
            aPrfMol[idhdf]  = np.delete(aPrfMol[idhdf], statDataCl[idhdf].inds, axis=0)
            avkVMR[idhdf]   = np.delete(avkVMR[idhdf], statDataCl[idhdf].inds, axis=0)
            avkSCF[idhdf]   = np.delete(avkSCF[idhdf], statDataCl[idhdf].inds, axis=0)
            avkTC[idhdf]    = np.delete(avkTC[idhdf], statDataCl[idhdf].inds, axis=0)
            AirMPrf[idhdf]  = np.delete(AirMPrf[idhdf], statDataCl[idhdf].inds, axis=0)

        except Exception as errmsg:
            print '\nError: ', errmsg

        if pColsFlg:

            # AvgTpp       = mf.dailyAvg(dtp[idhdf], dates[idhdf], dateAxis=1, meanAxis=0)
            # AvgTpp       = AvgTpp['dailyAvg']

            # maxTpp       = np.max(AvgTpp)
            # minTpp       = np.min(AvgTpp)
            # meanTpp      = np.mean(AvgTpp)
            # stdTpp       = np.std(AvgTpp)

            #print '\nMean TPH: {0:.2f} +/- {1:.2f}'.format(meanTpp, stdTpp)

            if float(Lat_i[0]) >=70.: 
                meanTpp = 8.8
                stdTpp  = 1.2
            elif (float(Lat_i[0]) >= 60.0) & (float(Lat_i[0]) < 70.0):
                meanTpp = 9.8
                stdTpp  = 1.3
            elif (float(Lat_i[0]) >= 50.0) & (float(Lat_i[0]) < 60.0):
                meanTpp = 10.9
                stdTpp  = 1.2

            elif (float(Lat_i[0]) >= 40.0) & (float(Lat_i[0]) < 50.0):
                meanTpp = 11.6
                stdTpp  = 1.6

            elif (float(Lat_i[0]) >= 30.0) & (float(Lat_i[0]) < 40.0):
                meanTpp = 12.9 #12.58
                stdTpp  = 2.4  #2.72

            elif (float(Lat_i[0]) >= 20.0) & (float(Lat_i[0]) < 30.0):
                meanTpp = 15.0
                stdTpp  = 1.3

            elif (float(Lat_i[0]) >= -25.0) & (float(Lat_i[0]) < 20.0):
                meanTpp = 16.5
                stdTpp  = 0.4

            elif (float(Lat_i[0]) >= -40.0) & (float(Lat_i[0]) < -25.0):
                meanTpp = 12.3
                stdTpp  = 2.2

            elif (float(Lat_i[0]) >= -50.0) & (float(Lat_i[0]) < -40.0):
                meanTpp = 11.1
                stdTpp  = 1.3

            elif float(Lat_i[0]) < -50:
                meanTpp = 8.8
                stdTpp  = 1.7

            partialCols  = [ [0.0, 4.0], [4.0, (meanTpp - stdTpp*2.)], [(meanTpp+stdTpp*2.), 40.] ]

            for ii, pc in enumerate(partialCols):

                inds = np.where( (alt[idhdf] >= pc[0]) & (alt[idhdf] <= pc[1])  )[0]

                if (idhdf == 'Kiruna') or (idhdf == 'Izana') or (idhdf == 'Paris') or (idhdf == 'Altzomoni'):       

                    rPcol[idhdf+str(pc)]  = np.sum(rPrfMol[idhdf][:,inds], axis=1)
                    aPcol[idhdf+str(pc)]  = np.sum(aPrfMol[idhdf][:,inds], axis=1)

                    rPcol_2[idhdf+str(pc)]  = np.sum(rPrfMol_2[idhdf][:,inds], axis=1)


                        rPvmr[idhdf+str(pc)]  = np.average(rPrf[idhdf][:,inds], weights=AirMPrf[idhdf][:,inds],axis=1)
                        aPvmr[idhdf+str(pc)]  = np.average(aPrf[idhdf][:,inds], weights=AirMPrf[idhdf][:,inds],axis=1)

                        rPvmr_2[idhdf+str(pc)]  = np.average(rPrf_2[idhdf][:,inds], weights=AirMPrf_2[idhdf][:,inds],axis=1)
                    except Exception as errmsg:
                        rPvmr[idhdf+str(pc)]    = np.zeros(len(rPrfMol[idhdf][:,0]))
                        rPvmr[idhdf+str(pc)][:] = float('nan')

                        rPvmr_2[idhdf+str(pc)]    = np.zeros(len(rPrfMol_2[idhdf][:,0]))
                        rPvmr_2[idhdf+str(pc)][:] = float('nan')

                        aPvmr[idhdf+str(pc)]    = np.zeros(len(rPrfMol[idhdf][:,0]))
                        aPvmr[idhdf+str(pc)][:] = float('nan')


                    rPcol[idhdf+str(pc)]  = np.sum(rPrfMol[idhdf][:,inds], axis=1)
                    aPcol[idhdf+str(pc)]  = np.sum(aPrfMol[idhdf][:,inds], axis=1)

                    rPcol_2[idhdf+str(pc)]  = np.sum(rPrfMol_2[idhdf][:,inds], axis=1)


                        rPvmr[idhdf+str(pc)]  = np.average(rPrf[idhdf][:,inds], weights=AirMPrf[idhdf][:,inds],axis=1)
                        aPvmr[idhdf+str(pc)]  = np.average(aPrf[idhdf][:,inds], weights=AirMPrf[idhdf][:,inds],axis=1)

                        rPvmr_2[idhdf+str(pc)]  = np.average(rPrf_2[idhdf][:,inds], weights=AirMPrf_2[idhdf][:,inds],axis=1)

                    except Exception as errmsg:
                        rPvmr[idhdf+str(pc)]    = np.zeros(len(rPrfMol[idhdf][:,0]))
                        rPvmr[idhdf+str(pc)][:] = float('nan')

                        aPvmr[idhdf+str(pc)]    = np.zeros(len(rPrfMol[idhdf][:,0]))
                        aPvmr[idhdf+str(pc)][:] = float('nan')

                        rPvmr_2[idhdf+str(pc)]    = np.zeros(len(rPrfMol_2[idhdf][:,0]))
                        rPvmr_2[idhdf+str(pc)][:] = float('nan')

                if ii == 0:
                    PcolTrop1[idhdf]     = np.asarray(rPcol[idhdf+str(pc)])/TCsclfct
                    PcolTropapr1[idhdf]  = np.asarray(aPcol[idhdf+str(pc)])/TCsclfct

                    WvmrTrop1[idhdf]     = np.asarray(rPvmr[idhdf+str(pc)])
                    WvmrTropapr1[idhdf]  = np.asarray(aPvmr[idhdf+str(pc)])

                    altbl1[idhdf]       = np.zeros(len(rPrfMol[idhdf][:,0]))
                    altbl1[idhdf][:]    = np.asarray(alt[idhdf][inds[-1]])

                    altbl2[idhdf]       = np.zeros(len(rPrfMol[idhdf][:,0]))
                    altbl2[idhdf][:]    = np.asarray(alt[idhdf][inds[0]])

                    WvmrTrop1_2[idhdf]     = np.asarray(rPvmr_2[idhdf+str(pc)])

                elif ii == 1:
                    PcolTrop2[idhdf]     = np.asarray(rPcol[idhdf+str(pc)])/TCsclfct
                    PcolTropapr2[idhdf]  = np.asarray(aPcol[idhdf+str(pc)])/TCsclfct

                    WvmrTrop2[idhdf]     = np.asarray(rPvmr[idhdf+str(pc)])
                    WvmrTropapr2[idhdf]  = np.asarray(aPvmr[idhdf+str(pc)])

                    altft1[idhdf]       = np.zeros(len(rPrfMol[idhdf][:,0]))
                    altft1[idhdf][:]    = np.asarray(alt[idhdf][inds[-1]])

                    altft2[idhdf]       = np.zeros(len(rPrfMol[idhdf][:,0]))
                    altft2[idhdf][:]    = np.asarray(alt[idhdf][inds[0]])

                    WvmrTrop2_2[idhdf]     = np.asarray(rPvmr_2[idhdf+str(pc)])

                elif ii == 2:
                    PcolStrat[idhdf]    = np.asarray(rPcol[idhdf+str(pc)])/TCsclfct
                    PcolStratapr[idhdf] = np.asarray(aPcol[idhdf+str(pc)])/TCsclfct

                    WvmrStrat[idhdf]    = np.asarray(rPvmr[idhdf+str(pc)])
                    WvmrStratapr[idhdf] = np.asarray(aPvmr[idhdf+str(pc)])

                    altst1[idhdf]       = np.zeros(len(rPrfMol[idhdf][:,0]))
                    altst1[idhdf][:]    = np.asarray(alt[idhdf][inds[-1]])

                    altst2[idhdf]       = np.zeros(len(rPrfMol[idhdf][:,0]))
                    altst2[idhdf][:]    = np.asarray(alt[idhdf][inds[0]])

                    WvmrStrat_2[idhdf]   = np.asarray(rPvmr_2[idhdf+str(pc)])

        totWvmr[idhdf]  = np.average(rPrf[idhdf], axis=1, weights=AirMPrf[idhdf])
        atotWvmr[idhdf] = np.average(aPrf[idhdf], axis=1, weights=AirMPrf[idhdf])
    clmap = 'jet'
    cm           = plt.get_cmap(clmap)
    yearsLc      = YearLocator()
    daysLc       = DayLocator()
    months       = MonthLocator()
    DateFmt      = DateFormatter('%m')
    fig, ax   = plt.subplots(2, figsize=(8, 9), sharex=True)
    fig2, ax2 = plt.subplots(figsize=(7, 6))

    OCS_all   = []
    OCS_e_all = []
    N2O_all   = []
    N2O_e_all = []

    OCS_trop_all = []
    OCS_trop_e_all = []

    N2O_trop_all = []
    N2O_trop_e_all = []

    lifetime_all = []

    for i, idhdf in enumerate(pltID):

        if ColFlg:

            Avg            = mf.mnthlyAvg(totClmn[idhdf], dates[idhdf], dateAxis=1, meanAxis=0)
            Dates          = Avg['dates']
            dateYearFrac   = mf.toYearFraction(Avg['dates'])
            AvgData        =  Avg['mnthlyAvg']
            std            =  Avg['std']

            Avg_2          = mf.mnthlyAvg(totClmn_2[idhdf], dates_2[idhdf], dateAxis=1, meanAxis=0)
            Dates_2        = Avg_2['dates']
            dateYearFrac_2 = mf.toYearFraction(Avg_2['dates'])
            AvgData_2      =  Avg_2['mnthlyAvg']
            std_2          =  Avg_2['std']


            Avg            = mf.mnthlyAvg(WvmrStrat[idhdf], dates[idhdf], dateAxis=1, meanAxis=0)
            Dates          = Avg['dates']
            dateYearFrac   = mf.toYearFraction(Avg['dates'])
            AvgData        =  Avg['mnthlyAvg']
            std            =  Avg['std']

            Avg_2          = mf.mnthlyAvg(WvmrStrat_2[idhdf], dates_2[idhdf], dateAxis=1, meanAxis=0)
            Dates_2        = Avg_2['dates']
            dateYearFrac_2 = mf.toYearFraction(Avg_2['dates'])
            AvgData_2      =  Avg_2['mnthlyAvg']
            std_2          =  Avg_2['std']

        AvgTrop        = mf.mnthlyAvg(WvmrTrop2[idhdf], dates[idhdf], dateAxis=1, meanAxis=0)
        OCStrop        =  AvgTrop['mnthlyAvg']
        OCStrop_e      = AvgTrop['std']
        AvgTrop2       = mf.mnthlyAvg(WvmrTrop2_2[idhdf], dates_2[idhdf], dateAxis=1, meanAxis=0)
        N2Otrop        =  AvgTrop2['mnthlyAvg']
        N2Otrop_e      = AvgTrop2['std']

        intrsctVals = np.intersect1d(dateYearFrac, dateYearFrac_2, assume_unique=False)
        inds1       = np.nonzero( np.in1d( dateYearFrac, intrsctVals, assume_unique=False ) )[0]
        inds2       = np.nonzero( np.in1d( dateYearFrac_2, intrsctVals, assume_unique=False ) )[0]

        print '\n'
        print idhdf
        #print 'Total Number of Monthly OCS = ' +str(len(dateYearFrac))
        #print 'Total Number of Monthly N2O = ' +str(len(dateYearFrac_2))
        #print 'Total Number of coincident dates between OCS and N2O = ' +str(len(intrsctVals))

        AvgData   = AvgData[inds1]
        AvgData_2 = AvgData_2[inds2]

        std       = std[inds1]
        std_2     = std_2[inds2]

        indsZero   = np.where(std <= 0.)[0]
        indsZero_2 = np.where(std_2 <= 0.)[0]

        std[indsZero] =  AvgData[indsZero]*0.05 
        std_2[indsZero_2] =  AvgData_2[indsZero_2]*0.05    
        Dates     = Dates[inds1]
        Dates_2   = Dates_2[inds2]





        meanTropOCS   = np.nanmean(OCStrop[inds1])
        meanTropN2O   = np.nanmean(N2Otrop[inds2])

        ax[i].plot(Dates, AvgData,   linestyle='-', marker ='', color='b', label='OCS')
        ax[i].scatter(Dates, AvgData, s=35, edgecolor='k', color='b')

        axr = ax[i].twinx()

        axr.plot(Dates_2, AvgData_2,   linestyle='-', marker ='', color='r', label='N2O')
        axr.scatter(Dates_2, AvgData_2, s=35, edgecolor='k', color='r')

        if i == 0:  
            ax[i].legend(prop={'size':12}, loc=2)
            axr.legend(prop={'size':12}, loc=3)

        ax2.plot(AvgData, AvgData_2, linestyle='none', marker ='')
        ax2.scatter(AvgData, AvgData_2, s=35, edgecolor='k', label=idhdf)

        odr, odrErr  = mf.orthoregress(AvgData, AvgData_2, xerr= std, yerr=std_2,  InError=True)
        slopelr, interceptlr, r_valueln, p_valuelr, std_errlr = stats.linregress(AvgData, AvgData_2)

        slope      = float(odr[0])
        slope_e    = float(odrErr[0])
        intercept  = float(odr[1])
        intercept_e  = float(odrErr[1])

        if ColFlg: ax[i].set_ylabel('OCS [{}]'.format(TCsclfctName), fontsize=16)
        else: ax[i].set_ylabel('OCS [ppb]', fontsize=16)
        #ax[i].set_xlabel('OCS [ppt]', fontsize=16)
        ax[i].tick_params(axis='both', which='major', labelsize=14)

        if ColFlg: axr.set_ylabel('N$_2$O  [{}]'.format(TCsclfctName), fontsize=16)
        else: axr.set_ylabel('N$_2$O [ppb]', fontsize=16)

        #ax[i].set_xlabel('OCS [ppt]', fontsize=16)
        axr.tick_params(axis='both', which='major', labelsize=14)

        if i == 0:  
            ax[i].legend(prop={'size':12}, loc=2)
            axr.legend(prop={'size':12}, loc=3)

        lifetime = slope * (meanTropOCS / meanTropN2O) * 117.

        lifetime_e  = np.sqrt( (slope_e/slope)**2 + (20./117.)**2  +  (np.std(OCStrop[inds1])/np.mean(OCStrop[inds1]))**2 +  (np.std(N2Otrop[inds2])/np.mean(N2Otrop[inds2]))**2   ) * lifetime

        print '\nSlope: {0:.2f} +/- {1:.2f}'.format(slope, slope_e)
        print 'Intercept = {0:.3f} +/- {1:.3f}'.format(intercept, intercept_e)
        print 'R value = {0:.2f}'.format(float(r_valueln))
        print 'Trop OCS [ppb] = {0:.3f} +/- {1:.3f}'.format(np.mean(OCStrop[inds1]), np.std(OCStrop[inds1]))
        print 'Trop N2O [ppb] = {0:.3f} +/- {1:.3f}'.format(np.mean(N2Otrop[inds2]), np.std(N2Otrop[inds2]))

        print 'Lifetime = {0:.2f} +/- {1:.2f}'.format(float(lifetime), float(lifetime_e))


    OCS_all        = np.asarray(OCS_all)
    OCS_e_all      = np.asarray(OCS_e_all)
    N2O_e_all      = np.asarray(N2O_e_all)
    N2O_all        = np.asarray(N2O_all)

    OCS_trop_all   = np.asarray(OCS_trop_all)
    N2O_trop_all   = np.asarray(N2O_trop_all)

    odr, odrErr  = mf.orthoregress(OCS_all, N2O_all, xerr=OCS_e_all, yerr=N2O_e_all, InError=True)
    slopelr, interceptlr, r_valueln, p_valuelr, std_errlr = stats.linregress(OCS_all, N2O_all)

    slope      = float(odr[0])
    slope_e    = float(odrErr[0])
    intercept  = float(odr[1])
    intercept_e  = float(odrErr[1])

    lifetime    = slope * (np.mean(OCS_trop_all) / np.mean(N2O_trop_all)) * 117.
    lifetime_e  = np.sqrt( (slope_e/slope)**2 + (20./117.)**2  +  (np.std(OCS_trop_all)/np.mean(OCS_trop_all))**2 +  (np.std(N2O_trop_all)/np.mean(N2O_trop_all))**2   ) * lifetime
    print '\nAll'
    print '\nSlope        = {0:.2f} +/- {1:.2f}'.format(slope, slope_e)
    print 'Intercept      = {0:.3f} +/- {1:.3f}'.format(intercept, intercept_e)
    print 'R value        = {0:.2f}'.format(float(r_valueln))
    print 'Trop OCS [ppb] = {0:.3f} +/- {1:.3f}'.format(np.mean(OCS_trop_all), np.std(OCS_trop_all))
    print 'Trop N2O [ppb] = {0:.3f} +/- {1:.3f}'.format(np.mean(N2O_trop_all), np.std(N2O_trop_all))
    print 'Lifetime       = {0:.2f} +/- {1:.2f}'.format(float(lifetime), float(lifetime_e))

    lifetime_all = np.asarray(lifetime_all)

    print 'Lifetime all (Mean)   = {0:.2f} +/- {1:.2f}'.format(np.mean(lifetime_all), np.std(lifetime_all))
    print 'Lifetime all (Median) = {0:.2f} +/- {1:.2f}'.format(np.median(lifetime_all), np.std(lifetime_all))



    if ColFlg:
        ax2.set_xlabel('OCS [{} mole/cm$^2$]'.format(TCsclfctName), fontsize=16)
        ax2.set_ylabel('N$_2$O [{} mole/cm$^2$]'.format(TCsclfctName), fontsize=16)

        ax2.set_ylabel('N$_2$O [ppb]', fontsize=16)
        ax2.set_xlabel('OCS [ppb]', fontsize=16)
    ax2.tick_params(axis='both', which='major', labelsize=14)
    #ax2.set_ylim(300, 350)
    #ax2.set_xlim(0.3, 0.5)

    fig.subplots_adjust(left = 0.12, bottom=0.075, top=0.95, right = 0.9)
    fig2.subplots_adjust(left = 0.12, bottom=0.12, top=0.95, right = 0.95)

    if saveFlg: 
        plt.show(block= False)
        user_input = raw_input('Press any key to exit >>> ')
 def _linear_regression(response):
     return linregress(bandpassed_frequencies, y=response)
def get_Rsquared(y, predicted):
    m, b, r, p, e = linregress(y=y, x=predicted)
    r2 = r**2
    return r2
 def calc_slope(x):
     from scipy.stats.mstats import linregress
     y = range(0, len(x))
     slope, intercept, r_value, p_value, std_err = linregress(y, x)
     return slope