Python sf 예제들, scipy.stats.distributions.chi2.sf Python 예제들

예제 #1

0

파일 보기

파일: linear_mixed_model.py 프로젝트: ahiduchick/hail

    def _fit_alternative_numpy(self, pa, a):
        from scipy.linalg import solve, LinAlgError
        from scipy.stats.distributions import chi2

        gamma = self.gamma
        dpa = self._d_alt * pa

        # single thread => no need to copy
        ydy = self._ydy_alt
        xdy = self._xdy_alt
        xdx = self._xdx_alt

        if self.low_rank:
            xdy[0] = self.py @ dpa + gamma * (self.y @ a)
            xdx[0, 0] = pa @ dpa + gamma * (a @ a)
            xdx[0, 1:] = self.px.T @ dpa + gamma * (self.x.T @ a)
        else:
            xdy[0] = self.py @ dpa
            xdx[0, 0] = pa @ dpa
            xdx[0, 1:] = self.px.T @ dpa

        try:
            beta = solve(xdx, xdy, assume_a='pos')  # only uses upper triangle
            residual_sq = ydy - xdy.T @ beta
            sigma_sq = residual_sq / self._dof_alt
            chi_sq = self.n * np.log(self._residual_sq / residual_sq)  # division => precision
            p_value = chi2.sf(chi_sq, 1)

            return beta[0], sigma_sq, chi_sq, p_value
        except LinAlgError:
            return tuple(4 * [float('nan')])

예제 #2

0

파일 보기

파일: LRT_functions.py 프로젝트: gymreklab/ssc-denovos-paper

def LikelihoodRatioTest(LRT_table_0_het, LRT_table_s_het, LRT_table_0_common, LRT_table_s_common, \
                        LRT_table_0_bins, LRT_table_s_bins, LRT_num_sims, obs_het, obs_common, \
                        obs_bins, constant_het, denom_het, constant_common, denom_common, eps_bins, \
                        use_het, use_common, use_bins):

    # Get likelihood s = 0
    likelihood_0 = GetLikelihoodFromTable(LRT_table_0_het, LRT_table_0_common, LRT_table_0_bins, \
                                          LRT_num_sims, obs_het, obs_common, obs_bins, constant_het, \
                                          denom_het, constant_common, denom_common, eps_bins, use_het, \
                                          use_common, use_bins)
    # Get likelihood s = ABC_s
    likelihood_s_ABC = GetLikelihoodFromTable(LRT_table_s_het, LRT_table_s_common, LRT_table_s_bins, \
                                              LRT_num_sims, obs_het, obs_common, obs_bins, constant_het, \
                                              denom_het, constant_common, denom_common, eps_bins, use_het, \
                                              use_common, use_bins)

    # Calculate likelihood ratio
    LR = likelihood_0 / likelihood_s_ABC

    # Calculate LogLR
    LogLR = -2 * np.log(LR)

    # LogLR ~ Mixture distribution (50% 0, 50% Chi-square (df=1))
    pval = 0.5 * SF(LogLR) + 0.5 * chi2.sf(LogLR, 1)
    return likelihood_0, likelihood_s_ABC, LR, LogLR, pval

예제 #3

0

파일 보기

def find_global_keywords(data):
    key_words = set()
    with open("data/aggregate_frequency.json", "r", encoding="utf8") as file:
        agg_dict = json.load(file)
        keys = list(agg_dict.keys())
        global_freqs = list()
        step = 900
        for chunk in range(step, len(keys), step):
            search_str = ",".join(keys[chunk - step: chunk])
            retrieved = NGramRequest(search_str, start_year=2018).getJSON()
            global_freqs.append(retrieved)
        with open("data/global_freqs.json", "w", encoding="utf8") as g_freqs_file:
            g_freqs_file.write(json.dumps(global_freqs, indent=4))
        for ngram in global_freqs:
            g_word = ngram['ngram']
            g_freq = ngram['timeseries'][-1]
            if g_freq != 0:
                g_ll = math.log(g_freq)
                local_ll = math.log(agg_dict[g_word])
                lr = likelihood_ratio(local_ll, g_ll)
                p = chi2.sf(lr, 1)
                if p < 0.001:
                    print(g_word)
                    key_words.add(g_word)
    print("\n".join(key_words))

예제 #4

0

파일 보기

파일: pyshs.py 프로젝트: emilienschultz/pyshs-bib

def likelihood_ratio(mod, mod_r):
    """
    Différence de déviance entre deux modèles logistiques (likelihood ratio)

    Parameters
    ----------
    mod : statsmodel object from GLM
     First model to compare
    mod_r : statsmodel object from GLM
     Second model to compare at
    
    Returns
    -------
    float : p-value of the likelihood ratio

    Comments
    --------
    Source : http://rnowling.github.io/machine/learning/2017/10/07/likelihood-ratio-test.html
    Testé en Rstats avec lmtest
    """
    val = [mod.llf, mod_r.llf]
    LR = 2 * (max(val) - min(val))  #rapport de déviance

    val = [mod.df_model, mod_r.df_model]
    diff_df = max(val) - min(val)  #différence de ddf

    p = chi2.sf(LR, diff_df)  #test de la significativité
    return p

예제 #5

0

파일 보기

파일: chisq_measure.py 프로젝트: jontaklee/LinkageDisequilibrium

    def test_chisq(self, loci, genos1, genos2):
        """compare counts of observed to expected haplotypes at a locus"""
        if loci[0] == loci[1]: return (loci[0], loci[1], 0.0, 1.0)

        # count alleles and haplotypes
        ac1 = {0: 0, 1: 0}
        ac2 = {0: 0, 1: 0}
        obs = {'00': 0, '01': 0, '10': 0, '11': 0}

        n_loci = len(genos1)
        for i in range(n_loci):
            allele1 = genos1[i]
            allele2 = genos2[i]

            haplo = '{0}{1}'.format(allele1, allele2)
            obs[haplo] += 1

            ac1[allele1] += 1
            ac2[allele2] += 1

        # observed haplotype counts
        observed = np.array(list(obs.values()))

        # expected haplotype counts
        allele_counts = [ac1[0], ac1[1], ac2[0], ac2[1]]
        af = np.array(allele_counts) / float(n_loci)
        expected = np.array([
            af[0] * af[2], af[0] * af[3], af[1] * af[2], af[1] * af[3]
        ]) * n_loci

        # perform chisquare test
        hap_chisq = ((observed - expected)**2) / expected
        chisq_tot = hap_chisq.sum()
        p = chi2.sf(chisq_tot, 1)
        return (loci[0], loci[1], chisq_tot, p)

예제 #6

0

파일 보기

def chi2_calc(f, par, X, Y, dY, dX, cov):
    """
        Parameters
        ----------    

        Returns
        -------

    """
    # as over this df is a derivative
    # has to be substitute, watch over
    df = (f(X + dX / 1e6, *par) - f(X, *par)) / (dX / 1e6)

    chi = sum((Y - f(X, *par))**2 / (dY**2 + (df * dX)**2))

    p = chi2.sf(chi, len(X) - len(par))
    sigma = sqrt(diag(cov))

    normcov = zeros((len(par), len(par)))

    for i in range(len(par)):
        for j in range(len(par)):
            normcov[i, j] = cov[i, j] / (sigma[i] * sigma[j])

    return chi, sigma, normcov, p

예제 #7

0

파일 보기

파일: lightcurve_emcee.py 프로젝트: samwalker2312/TGP

 def plotcurve(self):
     plotx = np.linspace(self.x[0], self.x[-1], 500)
     plotmodel = batman.TransitModel(self.params, plotx)
     model = batman.TransitModel(self.params, self.x)
     alpha = ''
     dof = 3 + len(self.params.u)
     if self.params.ecc == self.initialecc:
         alpha = '_fixedecc'
         dof += 2
     nu = len(self.y.tolist()) - dof
     print(nu)
     sigma2 = self.err**2.
     sum = (self.y-model.light_curve(self.params))**2.
     sum /= sigma2
     chisq = np.sum(sum)
     chisq_prob = chi2.sf(chisq, nu)
     print(chisq)
     print(chisq_prob)
     print(chisq/nu)
     fig = plt.figure()
     ax1 = fig.add_axes((.1,.3,.8,.6))
     ax2 = fig.add_axes((.1,.1,.8,.2))
     ax2.set_xlabel(r'$\textrm{Days from}~t_0$')
     ax1.set_ylabel(r'$\textrm{Relative flux}$')
     ax2.set_ylabel(r"$\textrm{Residuals}$")
     ax1.get_xaxis().set_ticks([])
     ax1.plot(plotx, plotmodel.light_curve(self.params), color='black', zorder = 10)
     ax2.plot(plotx, np.zeros_like(plotmodel.light_curve(self.params)), ':',color='black', zorder = 10)
     ax1.errorbar(self.x,self.y,yerr=self.err, fmt='o', mfc='darkgray', mec='darkgray', ecolor='darkgray', markersize=5, zorder = 0)
     ax2.errorbar(self.x, -(model.light_curve(self.params)-self.y), yerr = self.err, fmt='o', mfc='darkgray', mec='darkgray', ecolor='darkgray', markersize=5, zorder = 0)
     plt.savefig('finalplots/lightcurve_'+self.params.limb_dark+alpha+'.eps')
     plt.savefig('finalplots/lightcurve_'+self.params.limb_dark+alpha+'.png')
     plt.show()

예제 #8

0

파일 보기

파일: linear_mixed_model.py 프로젝트: lfrancioli/hail

    def _fit_alternative_numpy(self, pa, a):
        from scipy.linalg import solve, LinAlgError
        from scipy.stats.distributions import chi2

        gamma = self.gamma
        dpa = self._d_alt * pa

        # single thread => no need to copy
        ydy = self._ydy_alt
        xdy = self._xdy_alt
        xdx = self._xdx_alt

        if self.low_rank:
            xdy[0] = self.py @ dpa + gamma * (self.y @ a)
            xdx[0, 0] = pa @ dpa + gamma * (a @ a)
            xdx[0, 1:] = self.px.T @ dpa + gamma * (self.x.T @ a)
        else:
            xdy[0] = self.py @ dpa
            xdx[0, 0] = pa @ dpa
            xdx[0, 1:] = self.px.T @ dpa

        try:
            beta = solve(xdx, xdy, assume_a='pos')  # only uses upper triangle
            residual_sq = ydy - xdy.T @ beta
            sigma_sq = residual_sq / self._dof_alt
            chi_sq = self.n * np.log(self._residual_sq / residual_sq)  # division => precision
            p_value = chi2.sf(chi_sq, 1)

            return beta[0], sigma_sq, chi_sq, p_value
        except LinAlgError:
            return tuple(4 * [float('nan')])

예제 #9

0

파일 보기

 def score_test(Xtest, y_true, y_predict):
     """对step forward进入的变量进行Score检验。函数假设新进入的变量放在最后.
     Xtest包括vars_old(似合模型并给出预测值y_predict的),和var_new（一个待检验的新变量）。
     Score检验假设待检验变量的系数为0，所以Xtest虽然包括了它的数据，但拟合参数是按没有此变量计算出来的。"""
     u = np.dot(Xtest.T, y_true - y_predict)  # 一阶导数
     h = np.dot(Xtest.T * (y_predict * (1 - y_predict)).values.reshape(len(y_predict)), Xtest)  # 二阶导数
     score = np.dot(np.dot(u.T, np.linalg.inv(h)), u)  # score 是 1*1 数组
     p_value = chi2.sf(score, 1)  # Score统计量服从自由度为1的卡方分布
     return score, p_value

예제 #10

0

파일 보기

파일: owsieve.py 프로젝트: BlazZupan/orange3

 def __init__(self, data, attr1, attr2):
     self.observed = get_contingency(data, attr1, attr2)
     self.n = np.sum(self.observed)
     self.probs_x = self.observed.sum(axis=0) / self.n
     self.probs_y = self.observed.sum(axis=1) / self.n
     self.expected = np.outer(self.probs_y, self.probs_x) * self.n
     self.residuals = (self.observed - self.expected) / np.sqrt(self.expected)
     self.chisqs = self.residuals ** 2
     self.chisq = float(np.sum(self.chisqs))
     self.p = chi2.sf(self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1))

예제 #11

0

파일 보기

파일: heterozygoteSex.py 프로젝트: nlois/penguin-tools

def k2_lrt(dat, init_="k-means++", alpha=float(args.alpha)):
    # init_ is a vector of 0 and 1, describing the a priori groups for initialisation

    # Perform K=2 clustering:
    dat = np.array(dat)
    dat = dat.reshape(-1, 1)

    # Parse the initialisation parameters
    if type(init_) is list and len(init_) == len(dat):
        init_ = np.array(init_)
        mean_0 = np.mean(dat[np.where(init_ == 0)])
        mean_1 = np.mean(dat[np.where(init_ == 1)])
        init_ = np.array([[mean_0], [mean_1]])
        kmeans = KMeans(init=init_, n_clusters=2, n_init=1, max_iter=300, random_state=42)
    else:
        kmeans = KMeans(init="k-means++", n_clusters=2, n_init=10, max_iter=300, random_state=42)

    kmeans.fit(dat)
    # One cluster model log-likelihood:
    mu_1 = np.mean(dat)
    sigma_1 = np.std(dat)
    lnL_1 = np.sum(np.log(norm.pdf(dat, mu_1, sigma_1)))

    # Two cluster model log-likelihood:
    # Cluster 0:
    dat_0 = dat[np.where(kmeans.labels_ == 0)]
    mu_2_0 = np.mean(dat_0)
    sigma_2_0 = np.std(dat_0)
    lnL_2_0 = np.sum(np.log(norm.pdf(dat_0, mu_2_0, sigma_2_0)))

    # Cluster 1:
    dat_1 = dat[np.where(kmeans.labels_ == 1)]
    mu_2_1 = np.mean(dat_1)
    sigma_2_1 = np.std(dat_1)
    lnL_2_1 = np.sum(np.log(norm.pdf(dat_1, mu_2_1, sigma_2_1)))

    # Likelihood-ratio test:
    lnL_2 = lnL_2_0 + lnL_2_1
    LRT = - 2 * (lnL_1 - lnL_2)
    pval = chi2.sf(LRT, 2 + len(dat))
    # Degrees of freedom: I consider the base model has 1 df (the mean)

    # If we declare the test significant:
    if pval <= alpha:
        isBimodal = 1
        # We polarise the clusters: 0 for low het, 1 for high het
        if mu_2_0 >= mu_2_1:
            assign = [1 if x == 0 else 0 for x in kmeans.labels_]
        else:
            assign = [x for x in kmeans.labels_]
    else:
        isBimodal = 0
        assign = [0] * dat.shape[0]

    return ([assign, pval, isBimodal])

예제 #12

0

파일 보기

파일: extractTraitRELAXResult.py 프로젝트: MayroseLab/Keren

def extract_traitrelax_parameters(input_path):

    with open(input_path, "r") as infile:
        content = infile.read()

    dictionary = dict()
    dataset_id_regex = re.compile("Parsing file .*?([^\/]*?).bpp for options", re.MULTILINE | re.DOTALL)
    dictionary["dataset_id"] = dataset_id_regex.search(content).group(1)

    print("input_path: ", input_path)
    print("dataset: ", dictionary["dataset_id"])

    regex_strings = {"null_logl": "Null model fitting.*?Overall Log likelihood\.*?\s*\:\s*(-\d*\.?\d*)",
                     "null_kappa": "Null model fitting.*?RELAX.kappa_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_p": "Null model fitting.*?RELAX.p_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_omega1": "Null model fitting.*?RELAX.omega1_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_omega2": "Null model fitting.*?RELAX.omega2_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_theta1": "Null model fitting.*?RELAX.theta1_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_theta2": "Null model fitting.*?RELAX.theta2_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_k": "Null model fitting.*?RELAX.k_2\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_mu": "Null model fitting.*?TwoParameterBinary\.mu\.*?\s*\:\s*(\d*\.?\d*)",
                     "null_pi0": "Null model fitting.*?TwoParameterBinary\.pi0\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_logl": "Alternative model fitting.*Overall Log likelihood\.*?\s*\:\s*(-\d*\.?\d*)",
                     "alternative_kappa": "Alternative model fitting.*RELAX.kappa_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_p": "Null model fitting.*?RELAX.p_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_omega1": "Alternative model fitting.*RELAX.omega1_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_omega2": "Alternative model fitting.*RELAX.omega2_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_theta1": "Alternative model fitting.*RELAX.theta1_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_theta2": "Alternative model fitting.*RELAX.theta2_1\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_k": "Alternative model fitting.*RELAX.k_2\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_mu": "Alternative model fitting.*TwoParameterBinary\.mu\.*?\s*\:\s*(\d*\.?\d*)",
                     "alternative_pi0": "Alternative model fitting.*TwoParameterBinary\.pi0\.*?\s*\:\s*(\d*\.?\d*)"}
    # extract the basic field
    for field in regex_strings.keys():
        try:
            regex = re.compile(regex_strings[field], re.MULTILINE | re.DOTALL)
            dictionary[field] = regex.search(content).group(1)
        except:
            print("failed to extract ", field, " for dataset ", dictionary["dataset_id"])
            print("regex: ", regex_strings[field])
            print("function: extract_traitrelax_parameters")
            exit(1)
    # compute the induced parameters
    dictionary["null_omega0"] = str(float(dictionary["null_p"]) * float(dictionary["null_omega1"]))
    dictionary["null_p0"] = dictionary["null_theta1"]
    dictionary["null_p1"] = str(float(dictionary["null_theta2"]) * (1 - float(dictionary["null_theta1"])))
    dictionary["alternative_omega0"] = str(float(dictionary["alternative_p"]) * float(dictionary["alternative_omega1"]))
    dictionary["alternative_p0"] = dictionary["alternative_theta1"]
    dictionary["alternative_p1"] = str(float(dictionary["alternative_theta2"]) * (1 - float(dictionary["alternative_theta1"])))

    # LR and pvalue
    dictionary["LRT_statistic"] = 2 * (float(dictionary["alternative_logl"]) - float(dictionary["null_logl"]))
    dictionary["pvalue"] = chi2.sf(dictionary["LRT_statistic"], 1)  # 1 degree of freedom is the diff in num of parameters between alternative and null models

    return dictionary

예제 #13

0

파일 보기

파일: consistency.py 프로젝트: SidneyLYZhang/NewTVProgram

def kendallsW(datas):
    dim = datas.shape
    S = np.var(list(datas.apply(sum, axis = 0))) * dim[1]
    d = (dim[0]**2)*(dim[1]**3 - dim[1])
    w = S / d
    xx = dim[1] * (dim[0] - 1) * w
    df = dim[0] - 1
    pv = chi2.sf(xx, df)
    return({'type' : 'Kendall\'s W Test',
            'value' :  w ,
            'p-value' : pv })

예제 #14

0

파일 보기

파일: ramallo.py 프로젝트: facku24/MyS2016

def ej4():
    tiradas = 101
    N = [48, 35, 15, 3]
    P = [.67, .05, .11, .17]
    T = 0
    
    for i in range(4):
        print T
        T += (N[i] - tiradas * P[i])**2 / float(tiradas * P[i])
    
    return T, chi2.sf(T, 3)

예제 #15

0

파일 보기

def find_relative_keywords(a_dict, b_dict):
    keywords = set()
    for key in a_dict:
        if key in b_dict:
            a_freq = math.log(a_dict[key])
            b_freq = math.log(b_dict[key])
            lr = likelihood_ratio(a_freq, b_freq)
            p = chi2.sf(lr, 1)
            if p < 0.001:
                keywords.add(key.lower())
    return keywords

예제 #16

0

파일 보기

파일: likelyhood_estimation.py 프로젝트: niktoris1/Mutation_dependencies

    def ConductLikelyhoodRatioTest(self, resulting_LLH, hypothesis_value):

        lr = 2 * (hypothesis_value - resulting_LLH)

        p = chi2.sf(lr, 0)

        if p > 0.9772:
            print("Likelyhood ratio test has passed")
        else:
            print("WARNING, likelyhood ratio test has failed")

        return chi2

예제 #17

0

파일 보기

파일: owsieve.py 프로젝트: waqarini/orange3

 def __init__(self, data, attr1, attr2):
     self.observed = get_contingency(data, attr1, attr2)
     self.n = np.sum(self.observed)
     self.probs_x = self.observed.sum(axis=0) / self.n
     self.probs_y = self.observed.sum(axis=1) / self.n
     self.expected = np.outer(self.probs_y, self.probs_x) * self.n
     self.residuals = \
         (self.observed - self.expected) / np.sqrt(self.expected)
     self.chisqs = self.residuals**2
     self.chisq = float(np.sum(self.chisqs))
     self.p = chi2.sf(self.chisq,
                      (len(self.probs_x) - 1) * (len(self.probs_y) - 1))

예제 #18

0

파일 보기

파일: logistic.py 프로젝트: capricorn08/MLE

def compare_model(first, second):
    LR = likelihood_ratio(first, second)  ## min,max
    # 자유도계산
    df = abs(d1_num - d2_num)
    if df == 0:
        df = 1
    else:
        df == df
    ## H0 모형이 적합함
    ## 정규분포함을 전제
    p = chi2.sf(LR, df)
    return p

예제 #19

0

파일 보기

파일: analysis.py 프로젝트: rajivnarayan/genomic-features-survival

    def km_plot_data(self, name, time, censor, values):
        values_df = pd.DataFrame(
            {
                'time': time,
                'censor': censor,
                'value': values
            }, dtype=float)
        mean_value = values_df.value.mean()
        values_df['high'] = values_df.value >= mean_value

        data = {
            'time': robjects.FloatVector(values_df['time']),
            'censor': robjects.IntVector(values_df['censor']),
            'high': robjects.IntVector(values_df['high'])
        }
        df = robjects.DataFrame(data)

        # p value
        km_diff = self.surv.survdiff(
            robjects.Formula('Surv(time, censor) ~ high'), data=df)
        chisq_ind = list(km_diff.names).index('chisq')
        pvalue = chi2.sf(km_diff[chisq_ind][0], 1)

        km = self.surv.survfit(robjects.Formula('Surv(time, censor) ~ high'),
                               data=df)
        summary = pandas2ri.ri2py(r.summary(km, extend=True))
        r.assign('km', km)
        r.assign('times', data['time'])
        r.assign('res', r('summary(km, times=times)'))
        cols = r('lapply(c(2:6, 8:11), function(x) res[x])')
        r.assign('cols', cols)
        km_results = r('do.call(data.frame, cols)')
        km_results = pd.DataFrame(km_results)

        low_km = km_results[km_results['strata'] == 'high=0']
        high_km = km_results[km_results['strata'] == 'high=1']

        high_time, high_percent = self.make_plottable_kms(
            high_km['time'], high_km['surv'])
        low_time, low_percent = self.make_plottable_kms(
            low_km['time'], low_km['surv'])

        high = [{
            'percent': i[0],
            'time': i[1]
        } for i in zip(high_percent, high_time)]
        low = [{
            'percent': i[0],
            'time': i[1]
        } for i in zip(low_percent, low_time)]

        return {'high': high, 'low': low, 'p': float('%.4g' % pvalue)}

예제 #20

0

파일 보기

def chi2_test(a, b, chi2_p_thresh, label):
    sum_ = a + b
    chi2_val = (((a - sum_ / 2.) ** 2) + ((b - sum_ / 2.) ** 2)) / sum_
    chi2_p = chi2.sf(chi2_val, 1)

    if chi2_p <= chi2_p_thresh:
        logger.warning("{} Forward/Reverse read count imbalance.".format(label))
        logger.warning("+/- = {} / {}, Chi-squared test p-val = {} <= {}".format(
            a, b, chi2_p, chi2_p_thresh
        ))
    else:
        logger.info("{} Forward/Reverse read count +/- = {} / {}".format(label, a, b))
        logger.info("Chi-squared test p-val = {} > {}".format(chi2_p, chi2_p_thresh))

예제 #21

0

파일 보기

파일: testing.py 프로젝트: fabs200/Pareto2GBfit

 def __init__(self, LL1, LL2, df, verbose=True):
     """
     :param LL1: log-likelihood with H0
     :param LL2: log-likelihood with H1/fitted parameters
     :param df: specify dfs, # of tested params
     :param verbose: display results in table
     """
     self.LR = LR = 2 * (LL2 - LL1)
     self.pval = pval = chi2.sf(LR, df=df)
     tbl = PrettyTable()
     tbl.field_names = ['LR test', '']
     tbl.add_row(['chi2({}) = '.format(df), '{:.4f}'.format(LR)])
     tbl.add_row(['Prob > chi2', '{:.4f}'.format(pval)])
     if verbose: print(tbl)

예제 #22

0

파일 보기

파일: Models.py 프로젝트: arminmirrezai/ufscase

 def llr_test(model1: pm.ARIMA,
              model2: pm.ARIMA,
              significance=0.05) -> bool:
     """
     Likelihood ratio test
     :param model1: H0 model
     :param model2: HA model
     :param significance: significance level
     :return: H0 result test
     """
     k1 = len(model1.params())
     k2 = len(model2.params())
     lr = 2 * (k1 - k2) + model2.aic() - model1.aic()
     return chi2.sf(lr, k2 - k1) > significance

예제 #23

0

파일 보기

 def wald_test(result):
     """逐步回归backward的wald检验。result.wald_test_terms也实现了此算法 \n
     参数:
     ----------
     result: statsmodel.api.Logit.fit() 返回结果对象  \n
     返回值:
     ----------
     test_df: dataframe, wald 检验的结果，包含2列：wald_chi2，pvalue_chi2 """
     wald_chi2 = (result.params / result.bse) ** 2  # backward 的 wald 检验统计量，服从自由度为1的卡方分布
     wald_chi2.name = 'wald_chi2'
     pvalue_chi2 = pd.Series(chi2.sf(wald_chi2, 1),
                             index=wald_chi2.index, name='P>chi2')  # backward 的 wald 检验 p 值
     test = pd.concat([wald_chi2, pvalue_chi2], axis=1)
     return test

예제 #24

0

파일 보기

파일: network_metrics_degree_distributions.py 프로젝트: amandaliusa/piazza-network-analysis

def chi_squared_test(observed, mu, total):
    expected = []
    sum = 0
    for value in range(len(observed) - 1): 
        expected.append(total * (exp(-mu) * mu**value / factorial(value)))
        sum += total * (exp(-mu) * mu**value / factorial(value))
    expected.append(total - sum)
    (testStatistic, pValue) = chisquare(observed, f_exp=expected, 
                                        ddof=len(observed) - 1)  
    # note: the p-value returned by the chisquare function seems to be 
    # incorrect for the test statistic; I verified the return value of chi2.sf 
    # with both online tables of chi-squared value as well as Mathematica and 
    # MATLAB
    p = chi2.sf(testStatistic, len(observed) - 1)
    return (testStatistic, p)

예제 #25

0

파일 보기

파일: mmpc.py 프로젝트: JoeyQiang949/MMHC-Python

def independence_test_binary(tar, data, state):
    tar_size = len(state[tar])
    num_tar = np.zeros((tar_size, 1))
    num_che = {}
    num_co = {}

    for che in state.keys():
        if not (che is tar):
            che_size = len(state[che])
            num_che[che] = np.zeros((che_size, 1))
            num_co[che] = np.zeros((che_size, tar_size))

    for i in range(data.shape[0]):
        tar_state = state[tar].index(data[tar][i])
        num_tar[tar_state] = num_tar[tar_state] + 1

        for che in num_che.keys():
            che_state = state[che].index(data[che][i])
            num_che[che][che_state] = num_che[che][che_state] + 1
            num_co[che][che_state][
                tar_state] = num_co[che][che_state][tar_state] + 1

    p = {}
    for che in num_che.keys():
        G_temp = num_co[che] * np.log(
            num_co[che] * data.shape[0] / num_che[che].dot(num_tar.T))
        G_temp = G_temp.ravel()

        G = 2 * sum(G_temp[i]
                    for i in range(len(G_temp)) if not np.isnan(G_temp[i]))
        # p_temp = 1 - stats.chi2.cdf(G, (len(state[tar]) - 1) * (len(state[che]) - 1))

        dof = (len(state[tar]) - 1) * (len(state[che]) - 1)

        p_temp = chi2.sf(G, dof)
        if p_temp < 0.05:
            p[che] = p_temp

    pc_con = []
    if p:
        pc_con.append(min(p, key=p.get))
        p.pop(min(p, key=p.get))
    pc_rest = []
    while p:
        pc_rest.append(min(p, key=p.get))
        p.pop(min(p, key=p.get))

    return pc_con, pc_rest

예제 #26

0

파일 보기

def find_aggregate_keywords():
    data = get_data()
    with open("data/aggregate_frequency.json", "r", encoding="utf8") as file:
        agg_freq = json.load(file)
        key_words = set()
        for person_data in data:
            for word in person_data["freq"].keys():
                if word.lower() in agg_freq.keys():
                    local_freq = math.log(person_data["freq"][word])
                    act_freq = math.log(agg_freq[word.lower()])
                    lr = likelihood_ratio(local_freq, act_freq)
                    p = chi2.sf(lr, 1)
                    if p < 0.001:
                        key_words.add(word)
        with open("data/aggregate_keywords.txt", "w", encoding="utf8") as keyword_file:
            for key in key_words:
                keyword_file.write(key + "\n")

예제 #27

0

파일 보기

def chi_sq_test(p_val, c_val, m_val, n_val, signif):
    """Gives the Chi squared test results between groups one and two in a 2x2\
    contingency table.

    Parameters
    ==========

    p_val : Number of exposed in group one
    c_val : Number of exposed in group two
    m_val : Total number in group one
    n_val : Total number in group two
    signif : Significance cut off desired

    Returns
    =======

    The Chi square statistic

    Raises
    ======

    ValueError
        Significance level must be between 0 and 1

    See Also
    =======

    chi_sq_stat : Chi squared statistic

    Examples
    ========

    >>> chi_sq_test(56, 126, 366, 354, 0.05)
    (3.762993555770853e-10, True)
    >>> chi_sq_test(25, 108, 123, 313, 0.05)
    (0.0038048156707230687, True)

    """
    if not (isinstance(p_val, int) and isinstance(c_val, int)
            and isinstance(m_val, int) and isinstance(n_val, int)):
        raise TypeError('Count inputs must be integers')
    if not 0 <= signif <= 1:
        raise ValueError('Significance level must be between 0 and 1')
    stat = chi_sq_stat(p_val, c_val, m_val, n_val)
    prob = chi2.sf(stat, 1)
    return prob, prob < signif

예제 #28

0

파일 보기

def _hosmer_lemeshow(y_true, predict_probas, num_groups=10, labels=None):

    df = pd.DataFrame(data=predict_probas, columns=['prediction_proba'])

    if labels is None:
        labels = np.unique(y_true)

    y_true = label_binarize(y_true, classes=labels)[:, 0]

    df['label'] = y_true
    df['quantile_rank'] = pd.qcut(df['prediction_proba'],
                                  num_groups,
                                  labels=False,
                                  duplicates='drop')
    h = 0
    results = pd.DataFrame(columns=[
        'decile', 'lower_bound', 'upper_bound', 'num_observations',
        'num_failures', 'predicted_failures'
    ])
    for i in range(num_groups):
        pcat_predictions = df[df['quantile_rank'] == i]
        num_observations = len(pcat_predictions)
        if num_observations == 0:
            continue
        obs1 = len(pcat_predictions[pcat_predictions['label'] ==
                                    1])  # how many were in category 1
        exp1 = pcat_predictions['prediction_proba'].mean() * num_observations
        lower_bound = pcat_predictions['prediction_proba'].min()
        upper_bound = pcat_predictions['prediction_proba'].max()
        obs0 = num_observations - obs1
        exp0 = num_observations - exp1
        h += ((obs1 - exp1)**2) / exp1 + ((obs0 - exp0)**2) / exp0
        results = results.append(
            {
                'decile': i + 1,
                'lower_bound': lower_bound,
                'upper_bound': upper_bound,
                'num_observations': num_observations,
                'num_failures': obs1,
                'predicted_failures': exp1
            },
            ignore_index=True)

    p = chi2.sf(h, num_groups - 2)
    return h, p, results

예제 #29

0

파일 보기

 def __init__(self, data, attr1, attr2):
     attr1 = data.domain[attr1]
     attr2 = data.domain[attr2]
     if attr1.is_discrete and not attr1.values or \
             attr2.is_discrete and not attr2.values:
         self.p = np.nan
         return
     self.observed = get_contingency(data, attr1, attr2)
     self.n = np.sum(self.observed)
     self.probs_x = self.observed.sum(axis=0) / self.n
     self.probs_y = self.observed.sum(axis=1) / self.n
     self.expected = np.outer(self.probs_y, self.probs_x) * self.n
     self.residuals = \
         (self.observed - self.expected) / np.sqrt(self.expected)
     self.chisqs = self.residuals**2
     self.chisq = float(np.sum(self.chisqs))
     self.p = chi2.sf(self.chisq,
                      (len(self.probs_x) - 1) * (len(self.probs_y) - 1))

예제 #30

0

파일 보기

파일: gtest.py 프로젝트: sr14978/innocuous_ciphertexts

def gtest(f_obs, f_exp=None, ddof=0):
    """
    http://en.wikipedia.org/wiki/G-test
    The G test can test for goodness of fit to a distribution
    Parameters
    ----------
    f_obs : array
        observed frequencies in each category
    f_exp : array, optional
        expected frequencies in each category.  By default the categories are
        assumed to be equally likely.
    ddof : int, optional
        adjustment to the degrees of freedom for the p-value
    Returns
    -------
    chisquare statistic : float
        The chisquare test statistic
    p : float
        The p-value of the test.
    Notes
    -----
    The p-value indicates the probability that the observed distribution is
    drawn from a distribution given frequencies in expected.
    So a low p-value inidcates the distributions are different.
    Examples
    --------
    >>> gtest([9.0, 8.1, 2, 1, 0.1, 20.0], [10, 5.01, 6, 4, 2, 1])
    (117.94955444335938, 8.5298516190930345e-24)
    >>> gtest([1.01, 1.01, 4.01], [1.00, 1.00, 4.00])
    (0.060224734246730804, 0.97033649350189344)
    >>> gtest([2, 1, 6], [4, 3, 2])
    (8.2135343551635742, 0.016460903780063787)
    References
    ----------
    http://en.wikipedia.org/wiki/G-test
    """
    f_obs = [i if i != 0 else 1e-10 for i in f_obs]
    f_obs = np.asarray(f_obs, 'f')
    k = f_obs.shape[0]
    f_exp = np.array([np.sum(f_obs, axis=0) / float(k)] * k, 'f') \
                if f_exp is None \
                else np.asarray(f_exp, 'f')
    g = 2 * np.add.reduce(f_obs * np.log(f_obs / f_exp))
    return g, chi2.sf(g, k - 1 - ddof)

예제 #31

0

파일 보기

파일: heterozygoteSex.py 프로젝트: rcristofari/penguin-tools

def ScafLRT(geno, posdf, groups, scaf):

    these_idx = [x for x in posdf.loc[posdf["chrom"] == scaf].index]
    these_geno = geno[:, these_idx]
    these_geno = ma.array(these_geno, mask = [these_geno == -1])

    # Ref allele frequency
    nChroms, nRef = [], []
    for x in range(these_geno.shape[1]):
        nChroms.append(2 * these_geno[:, x].count(axis=0))
        nRef.append(np.where(these_geno[:, x] == 0)[0].shape[0] * 2 + np.where(these_geno[:, x] == 1)[0].shape[0])

    expHet = np.mean(np.array([2 * x * (1 - x) for x in [nRef[i] / c for i, c in enumerate(nChroms)]]))
    these_geno[these_geno == 2] = 0
    obsHet = these_geno.mean(axis=1)

    F = np.array([(1 - obsHet[i]/expHet) for i in range(obsHet.shape[0])])

    # One cluster model log-likelihood:
    mu_1 = np.mean(F)
    sigma_1 = np.std(F)
    lnL_1 = np.sum(np.log(norm.pdf(F, mu_1, sigma_1)))

    # Two cluster model log-likelihood:
    # Cluster 0:
    dat_0 = F[np.where(np.array(groups) == 0)]
    mu_2_0 = np.mean(dat_0)
    sigma_2_0 = np.std(dat_0)
    lnL_2_0 = np.sum(np.log(norm.pdf(dat_0, mu_2_0, sigma_2_0)))

    # Cluster 1:
    dat_1 = F[np.where(np.array(groups) == 1)]
    mu_2_1 = np.mean(dat_1)
    sigma_2_1 = np.std(dat_1)
    lnL_2_1 = np.sum(np.log(norm.pdf(dat_1, mu_2_1, sigma_2_1)))
    lnL_2 = lnL_2_0 + lnL_2_1

    # Likelihood-ratio test:
    LRT = - 2 * (lnL_1 - lnL_2)

    dof = 2 + len(groups)
    pval = chi2.sf(LRT,  dof)
    return(pval)

예제 #32

0

파일 보기

파일: owsieve.py 프로젝트: astaric/orange3

 def __init__(self, data, attr1, attr2):
     attr1 = data.domain[attr1]
     attr2 = data.domain[attr2]
     if attr1.is_discrete and not attr1.values or \
             attr2.is_discrete and not attr2.values:
         self.p = np.nan
         return
     self.observed = get_contingency(data, attr1, attr2)
     self.n = np.sum(self.observed)
     self.probs_x = self.observed.sum(axis=0) / self.n
     self.probs_y = self.observed.sum(axis=1) / self.n
     self.expected = np.outer(self.probs_y, self.probs_x) * self.n
     self.residuals = \
         (self.observed - self.expected) / np.sqrt(self.expected)
     self.residuals = np.nan_to_num(self.residuals)
     self.chisqs = self.residuals ** 2
     self.chisq = float(np.sum(self.chisqs))
     self.p = chi2.sf(
         self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1))

예제 #33

0

파일 보기

def chi2_approx(calc_stat, x, y):
    """
    Calculate the p-value for Dcorr and Hsic via a chi-squared approximation.

    In the case of distance and kernel methods, Dcorr (and by extension Hsic
    [#2ChiSq]_) can be approximated via a chi-squared distribution [#1ChiSq].
    This approximation is also applicable for the nonparametric MANOVA via
    independence testing method in our package [#3ChiSq]_.

    Parameters
    ----------
    calc_stat : callable()
        The method used to calculate the test statistic (must use hyppo API).
    x, y : ndarray
        Input data matrices. `x` and `y` must have the same number of
        samples. That is, the shapes must be `(n, p)` and `(n, q)` where
        `n` is the number of samples and `p` and `q` are the number of
        dimensions. Alternatively, `x` and `y` can be distance matrices,
        where the shapes must both be `(n, n)`.

    Returns
    -------
    stat : float
        The computed test statistic.
    pvalue : float
        The computed p-value.

    References
    ----------
    .. [#1ChiSq] Shen, C., & Vogelstein, J. T. (2019). The Chi-Square Test of Distance
                 Correlation. arXiv preprint arXiv:1912.12150.
    .. [#2ChiSq] Shen, C., & Vogelstein, J. T. (2018). The exact equivalence of
                 distance and kernel methods for hypothesis testing. arXiv preprint
                 arXiv:1806.05514.
    .. [#3ChiSq] Panda, S., Shen, C., Perry, R., Zorn, J., Lutz, A., Priebe, C. E., &
                 Vogelstein, J. T. (2019). Nonparametric MANOVA via Independence
                 Testing. arXiv e-prints, arXiv-1910.
    """
    n = x.shape[0]
    stat = calc_stat(x, y)
    pvalue = chi2.sf(stat * n + 1, 1)

    return stat, pvalue

예제 #34

0

파일 보기

파일: mmpc.py 프로젝트: Enderlogic/MMHC-Python

def p_value_calculator(N_kij, pc, dof):
    if pc:
        G = 0
        for k in range(N_kij.shape[2]):
            N_div = np.ones(N_kij.shape[0:2])
            N_div = np.multiply(N_div, N_kij[:, :, k].sum(axis=0))
            N_div = np.multiply(N_div, N_kij[:, :, k].sum(axis=1).reshape(N_kij[:, :, k].shape[0], 1))

            np.seterr(all='ignore')
            G = G + np.nansum(np.multiply(2 * N_kij[:, :, k], np.log(np.divide(N_kij[:, :, k] * N_kij[:, :, k].sum(), N_div))))
    else:
        N_div = np.ones(N_kij.shape)
        N_div = np.multiply(N_div, N_kij.sum(axis=0))
        N_div = np.multiply(N_div, N_kij.sum(axis=1).reshape(N_kij.shape[0], 1))

        np.seterr(all='ignore')
        G = np.nansum(np.multiply(2 * N_kij, np.log(np.divide(N_kij * N_kij.sum(), N_div))))
    p_value = chi2.sf(G, dof)
    return p_value

예제 #35

0

파일 보기

파일: owsieve.py 프로젝트: mstrazar/orange3

 def __init__(self, data, attr1, attr2):
     attr1 = data.domain[attr1]
     attr2 = data.domain[attr2]
     if attr1.is_discrete and not attr1.values or \
             attr2.is_discrete and not attr2.values:
         self.p = np.nan
         return
     self.observed = get_contingency(data, attr1, attr2)
     self.n = np.sum(self.observed)
     # pylint: disable=unexpected-keyword-arg
     self.probs_x = self.observed.sum(axis=0) / self.n
     self.probs_y = self.observed.sum(axis=1) / self.n
     self.expected = np.outer(self.probs_y, self.probs_x) * self.n
     with np.errstate(divide="ignore", invalid="ignore"):
         self.residuals = \
             (self.observed - self.expected) / np.sqrt(self.expected)
     self.residuals = np.nan_to_num(self.residuals)
     self.chisqs = self.residuals ** 2
     self.chisq = float(np.sum(self.chisqs))
     self.p = chi2.sf(
         self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1))

예제 #36

0

파일 보기

파일: pr7ej2.py 프로젝트: facku24/MyS2016

def valor_p(T, k):
	return chi2.sf(T,k-1)

예제 #37

0

파일 보기

파일: 9-15.py 프로젝트: facku24/MyS2016

def valor_p(T, m):
	return chi2.sf(T,m-1)

예제 #38

0

파일 보기

파일: 9-1.py 프로젝트: facku24/MyS2016

def valor_p(T):
	return chi2.sf(T,N-1)

예제 #39

0

파일 보기

파일: ej9j.py 프로젝트: facku24/MyS2016

def valor_p_ji(y, k):
	
	return 2* min(chi2.sf(y,k-1), 1 - chi2.sf(y,k-1))

예제 #40

0

파일 보기

파일: ehab_optim_updated.py 프로젝트: javimarlop/eHabpy

def	ehabitat(ecor,nw,nwpathout):

	global	nwpath
	if nw=='':
		nwpath = os.getcwd()
	else:
		nwpath = nw
		
	if gmaps == 0:
		initglobalmaps()
	if nwpathout=='':
		#outdir = 'results'	#	ToDo: locally	create	folder "results"	if it	does	not	exist!
		outdir = os.path.join(os.path.sep, os.getcwd(), 'results')
		safelyMakeDir(outdir)
	else:
		#outdir = nwpathout+'/results'	#	SHARED	FOLDER	PATH
		outdir = os.path.join(os.path.sep, nwpathout, 'results')
		safelyMakeDir(outdir)
		
	treepamin = treepamax = eprpamin = eprpamax = prepamin = prepamax = biopamin = biopamax = slopepamin = slopepamax = ndwipamin = ndwipamax = ndvimaxpamin = ndvimaxpamax = ndviminpamin = ndviminpamax = hpamin = hpamax = None
	s = nd.generate_binary_structure(2,2)	#	most	restrictive	pattern	for	the	landscape	patches
	#	LOCAL FOLDER
	csvname1 = os.path.join(os.path.sep, outdir, 'ecoregs_done.csv')
	print csvname1
	if os.path.isfile(csvname1) == False:
		wb = open(csvname1,'a')
		wb.write('None')
		wb.write('\n')
		wb.close()
	#	LOCAL FOLDER	
	csvname = os.path.join(os.path.sep, outdir, 'hri_results.csv')
	print csvname
	if os.path.isfile(csvname) == False:
		wb = open(csvname,'a')
		wb.write('ecoregion wdpaid averpasim hr2aver pxpa hr1insumaver hriaver nfeatsaver lpratio lpratio2 numpszok lpmaxsize aggregation treepamin treepamax eprpamin eprpamax prepamin prepamax biopamin biopamax slopepamin slopepamax ndwipamin ndwipamax ndvimaxpamin ndvimaxpamax ndviminpamin ndviminpamax hpamin hpamax treepamean eprpamean prepamean biopamean slopepamean ndwipamean ndvimaxpamean ndviminpamean hpamean')
		wb.write('\n')
		wb.close()
	treepamean = eprpamean = prepamean = biopamean = slopepamean = ndwipamean = ndvimaxpamean = ndviminpamean = hpamean = None
	ef = 'eco_'+str(ecor)+'.tif'
	ecofile = os.path.join(os.path.sep, nwpath, 'ecoregs', ef)
	#ecofile = os.path.join(os.path.sep, nwpath, os.path.sep,'ecoregs', os.path.sep, ef)
	print ecofile
	avail = os.path.isfile(ecofile)
	if avail == True:
		eco_csv = str(ecor)+'.csv'
		print eco_csv
		ecoparksf = os.path.join(os.path.sep, nwpath, 'pas', eco_csv)
		#ecoparksf = os.path.join(os.path.sep, nwpath, os.path.sep, 'pas', os.path.sep, eco_csv)
		print ecoparksf
		#ecoparksf = nwpath+'/pas/'+str(ecor)+'.csv'
		src_ds_eco = gdal.Open(ecofile)
		eco = src_ds_eco.GetRasterBand(1)
		eco_mask0 = eco.ReadAsArray(0,0,eco.XSize,eco.YSize).astype(np.int32)
		eco_mask = eco_mask0.flatten()
		gt_eco = src_ds_eco.GetGeoTransform()
		print 'eco mask'
		xoff = int((gt_eco[0]-gt_epr_global[0])/1000)
		yoff = int((gt_epr_global[3]-gt_eco[3])/1000)
		epr_eco_bb0 = epr_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		epr_eco_bb = epr_eco_bb0.flatten()
		epr_eco0 = np.where(eco_mask == 1,	(epr_eco_bb),(0))
		epr_eco = np.where(epr_eco0 == 65535.0,	(float('NaN')),(epr_eco0))
		maskepr = np.isnan(epr_eco)
		epr_eco[maskepr] = np.interp(np.flatnonzero(maskepr),	np.flatnonzero(~maskepr),	epr_eco[~maskepr])
		print 'eco epr'
		xoff = int((gt_eco[0]-gt_slope_global[0])/1000)
		yoff = int((gt_slope_global[3]-gt_eco[3])/1000)
		slope_eco_bb0 = slope_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		slope_eco_bb = slope_eco_bb0.flatten()
		slope_eco0 = np.where(eco_mask == 1,	(slope_eco_bb),(0))
		slope_eco = np.where(slope_eco0 == 65535.0,	(float('NaN')),(slope_eco0))
		maskslope = np.isnan(slope_eco)
		slope_eco[maskslope] = np.interp(np.flatnonzero(maskslope),	np.flatnonzero(~maskslope),	slope_eco[~maskslope])
		print 'eco slope'
		xoff = int((gt_eco[0]-gt_ndvimax_global[0])/1000)
		yoff = int((gt_ndvimax_global[3]-gt_eco[3])/1000)
		ndvimax_eco_bb0 = ndvimax_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		ndvimax_eco_bb = ndvimax_eco_bb0.flatten()
		ndvimax_eco0 = np.where(eco_mask == 1,	(ndvimax_eco_bb),(0))
		ndvimax_eco = np.where(ndvimax_eco0 == 65535.0,	(float('NaN')),(ndvimax_eco0))
		maskndvimax = np.isnan(ndvimax_eco)
		ndvimax_eco[maskndvimax] = np.interp(np.flatnonzero(maskndvimax),	np.flatnonzero(~maskndvimax),	ndvimax_eco[~maskndvimax])
		print 'eco ndvimax'
		xoff = int((gt_eco[0]-gt_ndvimin_global[0])/1000)
		yoff = int((gt_ndvimin_global[3]-gt_eco[3])/1000)
		ndvimin_eco_bb0 = ndvimin_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		ndvimin_eco_bb = ndvimin_eco_bb0.flatten()
		ndvimin_eco0 = np.where(eco_mask == 1,	(ndvimin_eco_bb),(0))
		ndvimin_eco = np.where(ndvimin_eco0 == 65535.0,	(float('NaN')),(ndvimin_eco0))
		maskndvimin = np.isnan(ndvimin_eco)
		ndvimin_eco[maskndvimin] = np.interp(np.flatnonzero(maskndvimin),	np.flatnonzero(~maskndvimin),	ndvimin_eco[~maskndvimin])
		print 'eco ndvimin'
		xoff = int((gt_eco[0]-gt_ndwi_global[0])/1000)
		yoff = int((gt_ndwi_global[3]-gt_eco[3])/1000)
		ndwi_eco_bb0 = ndwi_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		ndwi_eco_bb = ndwi_eco_bb0.flatten()
		ndwi_eco0 = np.where(eco_mask == 1,	(ndwi_eco_bb),(0))
		ndwi_eco = np.where(ndwi_eco0 == 255.0,	(float('NaN')),(ndwi_eco0))
		maskndwi = np.isnan(ndwi_eco)
		ndwi_eco[maskndwi] = np.interp(np.flatnonzero(maskndwi),	np.flatnonzero(~maskndwi),	ndwi_eco[~maskndwi])
		print 'eco ndwi'
		xoff = int((gt_eco[0]-gt_pre_global[0])/1000)
		yoff = int((gt_pre_global[3]-gt_eco[3])/1000)
		pre_eco_bb0 = pre_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		pre_eco_bb = pre_eco_bb0.flatten()
		pre_eco0 = np.where(eco_mask == 1,	(pre_eco_bb),(0))
		pre_eco = np.where(pre_eco0 == 65535.0,	(float('NaN')),(pre_eco0))
		maskpre = np.isnan(pre_eco)
		pre_eco[maskpre] = np.interp(np.flatnonzero(maskpre),	np.flatnonzero(~maskpre),	pre_eco[~maskpre])
		print 'eco pre'
		xoff = int((gt_eco[0]-gt_bio_global[0])/1000)
		yoff = int((gt_bio_global[3]-gt_eco[3])/1000)
		bio_eco_bb0 = bio_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		bio_eco_bb = bio_eco_bb0.flatten()
		bio_eco0 = np.where(eco_mask == 1,	(bio_eco_bb),(0))
		bio_eco = np.where(bio_eco0 == 65535.0,	(float('NaN')),(bio_eco0))
		maskbio = np.isnan(bio_eco)
		bio_eco[maskbio] = np.interp(np.flatnonzero(maskbio),	np.flatnonzero(~maskbio),	bio_eco[~maskbio])
		print 'eco bio'
		xoff = int((gt_eco[0]-gt_tree_global[0])/1000)
		yoff = int((gt_tree_global[3]-gt_eco[3])/1000)
		tree_eco_bb0 = tree_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		tree_eco_bb = tree_eco_bb0.flatten()
		tree_eco0 = np.where(eco_mask == 1,	(tree_eco_bb),(0))
		tree_eco = np.where(tree_eco0 == 255.0,	(float('NaN')),(tree_eco0))
		masktree = np.isnan(tree_eco)
		tree_eco[masktree] = np.interp(np.flatnonzero(masktree),	np.flatnonzero(~masktree),	tree_eco[~masktree])
		print 'eco tree'
		xoff = int((gt_eco[0]-gt_herb_global[0])/1000)
		yoff = int((gt_herb_global[3]-gt_eco[3])/1000)
		herb_eco_bb0 = herb_global.ReadAsArray(xoff,yoff,eco.XSize,eco.YSize).astype(np.float32)
		herb_eco_bb = herb_eco_bb0.flatten()
		herb_eco0 = np.where(eco_mask == 1,	(herb_eco_bb),(0))
		herb_eco = np.where(herb_eco0 == 255.0,	(float('NaN')),(herb_eco0))
		maskherb = np.isnan(herb_eco)
		herb_eco[maskherb] = np.interp(np.flatnonzero(maskherb),	np.flatnonzero(~maskherb),	herb_eco[~maskherb])
		print 'eco herb'
		ind_eco0 = np.column_stack((bio_eco,pre_eco,epr_eco,herb_eco,ndvimax_eco,ndvimin_eco,ndwi_eco,slope_eco,tree_eco))
		print 'ecovars stacked'
		
		print ecoparksf
		pa_list0 = np.genfromtxt(ecoparksf,dtype='string')	# crear este archivo en subpas!
		pa_list = np.unique(pa_list0)
		n = len(pa_list)
		for	px in range(0,n): #	0,n

			pa = pa_list[px]
			print pa

			outfile = os.path.join(os.path.sep, outdir, str(ecor)+'_'+str(pa)+'.tif')
			outfile2 = os.path.join(os.path.sep, outdir, str(ecor)+'_'+str(pa)+'_lp.tif')
			outfile3 = os.path.join(os.path.sep, outdir, str(ecor)+'_'+str(pa)+'_mask.tif')
			#outfile = outdir+'/'+str(ecor)+'_'+str(pa)+'.tif'	#	LOCAL FOLDER
			pa_infile = 'pa_'+str(pa)+'.tif'

			pa4 = os.path.join(os.path.sep, nwpath, 'pas', pa_infile)
			#pa4 = os.path.join(os.path.sep, nwpath, os.path.sep, 'pas', os.path.sep, pa_infile)
			print pa4
			#pa4 = nwpath+'/pas/pa_'+str(pa)+'.tif'

			dropcols = np.arange(9,dtype=int)
			done = os.path.isfile(outfile)
			avail2 = os.path.isfile(pa4)
			if done == False and avail2 == True:
				pafile=pa4
				src_ds_pa = gdal.Open(pafile)
				par = src_ds_pa.GetRasterBand(1)
				pa_mask0 = par.ReadAsArray(0,0,par.XSize,par.YSize).astype(np.int32)
				pa_mask = pa_mask0.flatten()
				ind = pa_mask >	0 #==int(pa)
				go = 1
				sum_pa_mask = sum(pa_mask[ind])#/int(pa)
				if sum_pa_mask < 3: go = 0	#	not	processing	areas	smaller	than	3	pixels
				print sum_pa_mask
				sum_pa_mask_inv = len(pa_mask[pa_mask == 0])
				print sum_pa_mask_inv
				print len(pa_mask)
				ratiogeom = 10000
				if sum_pa_mask > 0: ratiogeom = sum_pa_mask_inv/sum_pa_mask
				#print ratiogeom
				gt_pa = src_ds_pa.GetGeoTransform()
				xoff = int((gt_pa[0]-gt_pre_global[0])/1000)
				yoff = int((gt_pre_global[3]-gt_pa[3])/1000)
				if xoff>0 and yoff>0 and go == 1:
					num_bands=src_ds_eco.RasterCount
					driver = gdal.GetDriverByName("GTiff")
					dst_options = ['COMPRESS=LZW']
					dst_ds = driver.Create(	outfile,src_ds_eco.RasterXSize,src_ds_eco.RasterYSize,num_bands,gdal.GDT_Float32,dst_options)
					dst_ds.SetGeoTransform(	src_ds_eco.GetGeoTransform())
					dst_ds.SetProjection(	src_ds_eco.GetProjectionRef())
					xoff = int((gt_pa[0]-gt_tree_global[0])/1000)
					yoff = int((gt_tree_global[3]-gt_pa[3])/1000)
					tree_pa_bb0 = tree_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					tree_pa_bb = tree_pa_bb0.flatten()
					tree_pa0 = tree_pa_bb[ind]
					tree_pa = np.where(tree_pa0 == 255.0, (float('NaN')),(tree_pa0))
					mask2tree = np.isnan(tree_pa)
					if mask2tree.all() == True:
						dropcols[8] = -8
					else:
						tree_pa[mask2tree] = np.interp(np.flatnonzero(mask2tree),	np.flatnonzero(~mask2tree),	tree_pa[~mask2tree])
						tree_pa = np.random.random_sample(len(tree_pa),)/1000 + tree_pa
						print 'pa tree'

						treepamin = round(tree_pa.min(),2)
						treepamax = round(tree_pa.max(),2)
						treepamean = round(np.mean(tree_pa),2)
						print treepamin
						print treepamax
						treediff = abs(tree_pa.min()-tree_pa.max())
						if treediff < 0.001: dropcols[8] = -8

					xoff = int((gt_pa[0]-gt_epr_global[0])/1000)
					yoff = int((gt_epr_global[3]-gt_pa[3])/1000)
					epr_pa_bb0 = epr_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					epr_pa_bb = epr_pa_bb0.flatten()
					epr_pa0 = epr_pa_bb[ind]
					epr_pa = np.where(epr_pa0 == 65535.0,	(float('NaN')),(epr_pa0))
					mask2epr = np.isnan(epr_pa)
					if mask2epr.all() == True:
						dropcols[2] = -2
					else:
						epr_pa[mask2epr] = np.interp(np.flatnonzero(mask2epr),	np.flatnonzero(~mask2epr),	epr_pa[~mask2epr])
						epr_pa = np.random.random_sample(len(epr_pa),)/1000 + epr_pa
						print 'pa epr'

						eprpamin = round(epr_pa.min(),2)
						eprpamax = round(epr_pa.max(),2)
						eprpamean = round(np.mean(epr_pa),2)
						print eprpamin
						print eprpamax
						eprdiff = abs(epr_pa.min()-epr_pa.max())
						if eprdiff < 0.001: dropcols[2] = -2

					xoff = int((gt_pa[0]-gt_pre_global[0])/1000)
					yoff = int((gt_pre_global[3]-gt_pa[3])/1000)
					pre_pa_bb0 = pre_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					pre_pa_bb = pre_pa_bb0.flatten()
					pre_pa0 = pre_pa_bb[ind]
					pre_pa = np.where(pre_pa0 == 65535.0,	(float('NaN')),(pre_pa0))
					mask2pre = np.isnan(pre_pa)
					if mask2pre.all() == True:
						dropcols[1] = -1
					else:
						pre_pa[mask2pre] = np.interp(np.flatnonzero(mask2pre),	np.flatnonzero(~mask2pre),	pre_pa[~mask2pre])
						pre_pa = np.random.random_sample(len(pre_pa),)/1000 + pre_pa
						print 'pa pre'

						prepamin = round(pre_pa.min(),2)
						prepamax = round(pre_pa.max(),2)
						prepamean = round(np.mean(pre_pa),2)
						print prepamin
						print prepamax
						prediff = abs(pre_pa.min()-pre_pa.max())
						if prediff < 0.001: dropcols[1] = -1

					xoff = int((gt_pa[0]-gt_bio_global[0])/1000)
					yoff = int((gt_bio_global[3]-gt_pa[3])/1000)
					bio_pa_bb0 = bio_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					bio_pa_bb = bio_pa_bb0.flatten()
					bio_pa0 = bio_pa_bb[ind]
					bio_pa = np.where(bio_pa0 == 65535.0,	(float('NaN')),(bio_pa0))
					mask2bio = np.isnan(bio_pa)
					if mask2bio.all() == True:
						dropcols[0] = -0
					else:
						bio_pa[mask2bio] = np.interp(np.flatnonzero(mask2bio),	np.flatnonzero(~mask2bio),	bio_pa[~mask2bio])
						bio_pa = np.random.random_sample(len(bio_pa),)/1000 + bio_pa
						print 'pa bio'

						biopamin = round(bio_pa.min(),2)
						biopamax = round(bio_pa.max(),2)
						biopamean = round(np.mean(bio_pa),2)
						print biopamin
						print biopamax
						biodiff = abs(bio_pa.min()-bio_pa.max())
						if biodiff < 0.001: dropcols[0] = -0

					xoff = int((gt_pa[0]-gt_slope_global[0])/1000)
					yoff = int((gt_slope_global[3]-gt_pa[3])/1000)
					slope_pa_bb0 = slope_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					slope_pa_bb = slope_pa_bb0.flatten()
					slope_pa0 = slope_pa_bb[ind]
					slope_pa = np.where(slope_pa0 == 65535.0,	(float('NaN')),(slope_pa0))
					mask2slope = np.isnan(slope_pa)
					if mask2slope.all() == True:
						dropcols[7] = -7
					else:
						slope_pa[mask2slope] = np.interp(np.flatnonzero(mask2slope),	np.flatnonzero(~mask2slope),	slope_pa[~mask2slope])
						slope_pa = np.random.random_sample(len(slope_pa),)/1000 + slope_pa
						print 'pa slope'

						slopepamin = round(slope_pa.min(),2)
						slopepamax = round(slope_pa.max(),2)
						slopepamean = round(np.mean(slope_pa),2)
						print slopepamin
						print slopepamax
						slopediff = abs(slope_pa.min()-slope_pa.max())
						if slopediff < 0.001: dropcols[7] = -7

					xoff = int((gt_pa[0]-gt_ndwi_global[0])/1000)
					yoff = int((gt_ndwi_global[3]-gt_pa[3])/1000)
					ndwi_pa_bb0 = ndwi_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					ndwi_pa_bb = ndwi_pa_bb0.flatten()
					ndwi_pa0 = ndwi_pa_bb[ind]
					ndwi_pa = np.where(ndwi_pa0 == 255.0,	(float('NaN')),(ndwi_pa0))
					mask2ndwi = np.isnan(ndwi_pa)
					if mask2ndwi.all() == True:
						dropcols[6] = -6
					else:
						ndwi_pa[mask2ndwi] = np.interp(np.flatnonzero(mask2ndwi),	np.flatnonzero(~mask2ndwi),	ndwi_pa[~mask2ndwi])
						ndwi_pa = np.random.random_sample(len(ndwi_pa),)/1000 + ndwi_pa
						print 'pa ndwi'

						ndwipamin = round(ndwi_pa.min(),2)
						ndwipamax = round(ndwi_pa.max(),2)
						ndwipamean = round(np.mean(ndwi_pa),2)
						print ndwipamin
						print ndwipamax
						ndwidiff = abs(ndwi_pa.min()-ndwi_pa.max())
						if ndwidiff < 0.001: dropcols[6] = -6

					xoff = int((gt_pa[0]-gt_ndvimax_global[0])/1000)
					yoff = int((gt_ndvimax_global[3]-gt_pa[3])/1000)
					ndvimax_pa_bb0 = ndvimax_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					ndvimax_pa_bb = ndvimax_pa_bb0.flatten()
					ndvimax_pa0 = ndvimax_pa_bb[ind]
					ndvimax_pa = np.where(ndvimax_pa0 == 65535.0,	(float('NaN')),(ndvimax_pa0))
					mask2ndvimax = np.isnan(ndvimax_pa)
					if mask2ndvimax.all() == True:
						dropcols[4] = -4
					else:
						ndvimax_pa[mask2ndvimax] = np.interp(np.flatnonzero(mask2ndvimax),	np.flatnonzero(~mask2ndvimax),	ndvimax_pa[~mask2ndvimax])
						ndvimax_pa = np.random.random_sample(len(ndvimax_pa),)/1000 + ndvimax_pa
						print 'pa ndvimax'

						ndvimaxpamin = round(ndvimax_pa.min(),2)
						ndvimaxpamax = round(ndvimax_pa.max(),2)
						ndvimaxpamean = round(np.mean(ndvimax_pa),2)
						print ndvimaxpamin
						print ndvimaxpamax
						ndvimaxdiff = abs(ndvimax_pa.min()-ndvimax_pa.max())
						if ndvimaxdiff < 0.001: dropcols[4] = -4

					xoff = int((gt_pa[0]-gt_ndvimin_global[0])/1000)
					yoff = int((gt_ndvimin_global[3]-gt_pa[3])/1000)
					ndvimin_pa_bb0 = ndvimin_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					ndvimin_pa_bb = ndvimin_pa_bb0.flatten()
					ndvimin_pa0 = ndvimin_pa_bb[ind]
					ndvimin_pa = np.where(ndvimin_pa0 == 65535.0,	(float('NaN')),(ndvimin_pa0))
					mask2ndvimin = np.isnan(ndvimin_pa)
					if mask2ndvimin.all() == True:
						dropcols[5] = -5
					else:
						ndvimin_pa[mask2ndvimin] = np.interp(np.flatnonzero(mask2ndvimin),	np.flatnonzero(~mask2ndvimin),	ndvimin_pa[~mask2ndvimin])
						ndvimin_pa = np.random.random_sample(len(ndvimin_pa),)/1000 + ndvimin_pa
						print 'pa ndvimin'

						ndviminpamin = round(ndvimin_pa.min(),2)
						ndviminpamax = round(ndvimin_pa.max(),2)
						ndviminpamean = round(np.mean(ndvimin_pa),2)
						print ndviminpamin
						print ndviminpamax
						ndvimindiff = abs(ndvimin_pa.min()-ndvimin_pa.max())
						if ndvimindiff < 0.001: dropcols[5] = -5

					xoff = int((gt_pa[0]-gt_herb_global[0])/1000)
					yoff = int((gt_herb_global[3]-gt_pa[3])/1000)
					herb_pa_bb0 = herb_global.ReadAsArray(xoff,yoff,par.XSize,par.YSize).astype(np.float32)
					herb_pa_bb = herb_pa_bb0.flatten()
					herb_pa0 = herb_pa_bb[ind]
					herb_pa = np.where(herb_pa0 == 255.0,	(float('NaN')),(herb_pa0))
					mask2herb = np.isnan(herb_pa)
					if mask2herb.all() == True:
						dropcols[3] = -3
					else:
						herb_pa[mask2herb] = np.interp(np.flatnonzero(mask2herb),	np.flatnonzero(~mask2herb),	herb_pa[~mask2herb])
						herb_pa = np.random.random_sample(len(herb_pa),)/1000 + herb_pa
						print 'pa herb'

						hpamin = round(herb_pa.min(),2)
						hpamax = round(herb_pa.max(),2)
						hpamean = round(np.mean(herb_pa),2)
						print hpamin
						print hpamax
						hdiff = abs(herb_pa.min()-herb_pa.max())
						if hdiff < 0.001: dropcols[3] = -3

					cols = dropcols[dropcols>=0]
					ind_pa0 = np.column_stack((bio_pa,pre_pa,epr_pa,herb_pa,ndvimax_pa,ndvimin_pa,ndwi_pa,slope_pa,tree_pa))
					ind_pa = ind_pa0[:,cols]
					ind_eco = ind_eco0[:,cols]
					print ind_pa.shape
					hr1sum = hr1insum = indokpsz = pszok = sumpszok = lpratio2 = numpszok = hr1averpa = hr3aver = hr2aver = pszmax = num_featuresaver = lpratio = hr1medianpa = hr1insumaver = pxpa = aggregation = None
					print "PA masked"
					#print ind_pa
					if ind_pa.shape[0]>4 and ind_pa.shape[1]>1: 
						Ymean = np.mean(ind_pa,axis=0)
						print 'Max. mean value is '+ str(Ymean.max())
						print "Ymean ok"
						Ycov = np.cov(ind_pa,rowvar=False)
						print 'Max. cov value is '+ str(Ycov.max())
						print "Ycov	ok"
						#mh = mahalanobis_distances(Ymean,	Ycov,	ind_eco,	parallel=False)
						#mh2 = mahalanobis_distances(Ymean,	Ycov,	ind_eco,	parallel=True)
						mh2 = mahalanobis_distances_scipy(Ymean,	Ycov,	ind_eco,	parallel=True) # previous working version
						#mh2 = mahalanobis_distances_scipy(Ymean,	Ycov,	ind_eco,	parallel=False)
						maxmh=mh2.max()
						print 'Max. mh value is '+ str(maxmh)
						print 'Max. mh value is nan: '+ str(np.isnan(maxmh))
						mh = mh2*mh2
						print "mh ok"
						pmh = chi2.sf(mh,len(cols)).reshape((eco.YSize,eco.XSize)) # chisqprob
						pmhh = np.where(pmh	<=	0.001,None,	pmh)
						print "pmh ok"	#	quitar	valores	muy	bajos!
						pmhhmax = pmhh.max()
						print 'Max. similarity value is '+ str(pmhhmax)
						dst_ds.GetRasterBand(1).WriteArray(pmhh)
						dst_ds = None
						hr11 = np.where(pmhh>0,1,0) # 0.5
						hr1 = hr11.flatten()
						hr1sum = sum(hr1)
						print 'Number of pixels with similarity higher than 0 is '+str(hr1sum)
						hr1insumaver = hr1insum = 0
						hr1sumaver = hr1sum
						src_ds_sim = gdal.Open(outfile)
						sim = src_ds_sim.GetRasterBand(1)
						gt_sim = src_ds_sim.GetGeoTransform()
						xoff = int((gt_pa[0]-gt_sim[0])/1000)
						yoff = int((gt_sim[3]-gt_pa[3])/1000)
						xextentpa = xoff + par.XSize
						yextentpa = yoff + par.YSize
						xless = sim.XSize - xextentpa
						yless = sim.YSize - yextentpa
						xsize = par.XSize
						ysize = par.YSize
						if xoff>0 and yoff>0 and pmhhmax>0.01 and hr1sum>1 and maxmh!=float('NaN'):#and ratiogeom < 100: #	also	checks	if results	are	not	empty

							# reading the similarity ecoregion without the PA (tmp mask)
							os.system('gdal_merge.py '+str(ecofile)+' '+str(pa4)+' -o '+str(outfile3)+' -ot Int32')
							hri_pa_bb03 = sim.ReadAsArray().astype(np.float32)
							hri_pa_bb3 = hri_pa_bb03.flatten()
							
							src_ds_sim2 = gdal.Open(outfile3)
							sim2 = src_ds_sim2.GetRasterBand(1)
							gt_sim2 = src_ds_sim2.GetGeoTransform()
							hri_pa_bb02 = sim2.ReadAsArray().astype(np.int32)
							#hri_pa_bb2 = hri_pa_bb02.flatten()
							hri_pa_bb02_max = hri_pa_bb02.max()
							print 'PA: '+str(pa)
							print 'PA (= max) value from mask = '+str(hri_pa_bb02_max)
							if hri_pa_bb02.shape == hri_pa_bb03.shape:
							 hri_pa02 = np.where(hri_pa_bb02 == pa,0,hri_pa_bb03) # hri_pa_bb02_max


							 if xless < 0: xsize = xsize + xless
							 if yless < 0: ysize = ysize + yless
							 hri_pa_bb0 = sim.ReadAsArray(xoff,yoff,xsize,ysize).astype(np.float32)
							 hri_pa_bb = hri_pa_bb0.flatten()
							 indd = hri_pa_bb > 0
							 hri_pa0 = hri_pa_bb[indd]
							 print 'Total number of pixels with similarity values in PA: '+str(len(hri_pa0))
							 hr1averpa = round(np.mean(hri_pa0[~np.isnan(hri_pa0)]),2)
							 #print hr1averpa
							 #hr1medianpa = np.median(hri_pa0[~np.isnan(hri_pa0)])
							 print 'mean similarity in the park is '+str(hr1averpa)
							 #hr1insum = sum(np.where(hri_pa0 >= 0.5,	1,0))	#	use	hr1averpa	as	threshold	instead!						
							 hr1inaver = np.where(hri_pa0 >= hr1averpa,	1,0)
							 hr1insumaver = sum(hr1inaver)
							 #print hr1insum
							 ##labeled_arrayin, num_featuresin = nd.label(hr1inaver,	structure=s)
							 hr1averr = np.where(hri_pa02 >= hr1averpa,	1,0) # pmhh
							 hr1aver = hr1averr.flatten()
							 print 'Total number of pixels with similarity values in ECO: '+str(sum(hr1aver))
							 labeled_arrayaver, num_featuresaver = nd.label(hr1averr,	structure=s)
							 print 'Nr of similar patches found: '+str(num_featuresaver)
							 if num_featuresaver > 0:
							  lbls = np.arange(1, num_featuresaver+1)
							  psizes = nd.labeled_comprehension(labeled_arrayaver, labeled_arrayaver, lbls, np.count_nonzero, float, 0) #-1
							  pszmax = psizes.max()#-hr1insumaver
							  dst_ds2 = driver.Create(outfile2,src_ds_eco.RasterXSize,src_ds_eco.RasterYSize,num_bands,gdal.GDT_Int32,dst_options)
							  dst_ds2.SetGeoTransform(src_ds_eco.GetGeoTransform())
							  dst_ds2.SetProjection(src_ds_eco.GetProjectionRef())
							  dst_ds2.GetRasterBand(1).WriteArray(labeled_arrayaver)
							  dst_ds2 = None
							  #num_feats = num_features - num_featuresaver
							  hr1sumaver = sum(hr1aver)
							  hr2aver = hr1sumaver #- hr1insumaver
							  pxpa = ind_pa.shape[0]
							  indokpsz = psizes >= pxpa
							  pszsok = psizes[indokpsz] # NEW
							  sumpszok = sum(pszsok)
							  lpratio=round(float(pszmax/pxpa),2)
							  lpratio2=round(float(sumpszok/pxpa),2)
							  numpszok = len(pszsok)
							  hr3aver = round(float(hr2aver/pxpa),2)
							  aggregation = round(float(hr2aver/num_featuresaver),2)
						#hr2 = hr1sumaver - hr1insumaver
						#print hr2
						#hr3 = float(hr2/ind_pa.shape[0])
						#print hr3
					wb = open(csvname,'a')
					var = str(ecor)+' '+str(pa)+' '+str(hr1averpa)+' '+str(hr2aver)+' '+str(pxpa)+' '+str(hr1insumaver)+' '+str(hr3aver)+' '+str(num_featuresaver)+' '+str(lpratio)+' '+str(lpratio2)+' '+str(numpszok)+' '+str(pszmax)+' '+str(aggregation)+' '+str(treepamin)+' '+str(treepamax)+' '+str(eprpamin)+' '+str(eprpamax)+' '+str(prepamin)+' '+str(prepamax)+' '+str(biopamin)+' '+str(biopamax)+' '+str(slopepamin)+' '+str(slopepamax)+' '+str(ndwipamin)+' '+str(ndwipamax)+' '+str(ndvimaxpamin)+' '+str(ndvimaxpamax)+' '+str(ndviminpamin)+' '+str(ndviminpamax)+' '+str(hpamin)+' '+str(hpamax)+' '+str(treepamean)+' '+str(eprpamean)+' '+str(prepamean)+' '+str(biopamean)+' '+str(slopepamean)+' '+str(ndwipamean)+' '+str(ndvimaxpamean)+' '+str(ndviminpamean)+' '+str(hpamean)#	exclude	PA!	#+' '+str(hr1p25pa)#	'+str(hr3)+'	+' '+str(hr1medianpa)+' '+str(num_features)+' '
					wb.write(var)
					wb.write('\n')
					wb.close()
					print "results exported"
					os.system('rm '+str(outfile3))
		wb = open(csvname1,'a')	#	LOCAL	FOLDER
		var = str(ecor)
		wb.write(var)
		wb.write('\n')
		wb.close()	
	print "END ECOREG: " + str(ecor)