コード例 #1
0
ファイル: test_stats.py プロジェクト: mullens/khk-lights
def test_obrientransform():
    #this is a regression test to check np.var replacement
    #I didn't separately verigy the numbers
    x1 = np.arange(5)
    result = np.array(
      [[  5.41666667,   1.04166667,  -0.41666667,   1.04166667,  5.41666667],
       [ 21.66666667,   4.16666667,  -1.66666667,   4.16666667, 21.66666667]])
    assert_array_almost_equal(stats.obrientransform(x1, 2*x1), result, decimal=8)
コード例 #2
0
ファイル: test_stats.py プロジェクト: zoccolan/eyetracker
def test_obrientransform():
    #this is a regression test to check np.var replacement
    #I didn't separately verigy the numbers
    x1 = np.arange(5)
    result = np.array(
        [[5.41666667, 1.04166667, -0.41666667, 1.04166667, 5.41666667],
         [21.66666667, 4.16666667, -1.66666667, 4.16666667, 21.66666667]])
    assert_array_almost_equal(stats.obrientransform(x1, 2 * x1),
                              result,
                              decimal=8)
コード例 #3
0
ファイル: test_mstats_basic.py プロジェクト: andycasey/scipy
 def test_obrientransform(self):
     for n in self.get_n():
         x, y, xm, ym = self.generate_xy_sample(n)
         r = stats.obrientransform(x)
         rm = stats.mstats.obrientransform(xm)
         assert_almost_equal(r.T, rm[0:len(x)])
コード例 #4
0
 def test_obrientransform(self):
     for n in self.get_n():
         x, y, xm, ym = self.generate_xy_sample(n)
         r = stats.obrientransform(x)
         rm = stats.mstats.obrientransform(xm)
         assert_almost_equal(r.T, rm[0:len(x)])
コード例 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
コード例 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o",
                        "--outfile",
                        required=True,
                        help="Path to the output file.")
    parser.add_argument("--sample_one_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument(
        "--sample_cols",
        help="Input format, like smi, sdf, inchi,separate arrays using ;",
    )
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help=
        "Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help=
        "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta",
        action="store_true",
        default=False,
        help="Whether or not to return the internally computed a values.",
    )
    parser.add_argument(
        "--fisher",
        action="store_true",
        default=False,
        help="if true then Fisher definition is used",
    )
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help=
        "if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument(
        "--inclusive1",
        action="store_true",
        default=False,
        help="if false,lower_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive2",
        action="store_true",
        default=False,
        help="if false,higher_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive",
        action="store_true",
        default=False,
        help="if false,limit will be ignored",
    )
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help=
        "If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help=
        "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument(
        "--correction",
        action="store_true",
        default=False,
        help="continuity correction ",
    )
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help=
        "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help=
        "the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b",
                        type=int,
                        default=0,
                        help="The number of bins to use for the histogram")
    parser.add_argument("--N",
                        type=int,
                        default=0,
                        help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof",
                        type=int,
                        default=0,
                        help="Degrees of freedom correction")
    parser.add_argument(
        "--score",
        type=int,
        default=0,
        help="Score that is compared to the elements in a.",
    )
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help=
        "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument(
        "--new",
        type=float,
        default=0.0,
        help="Value to put in place of values in a outside of bounds",
    )
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help=
        "lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help=
        "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument(
        "--base",
        type=float,
        default=1.6,
        help="The logarithmic base to use, defaults to e",
    )
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols is not None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols is not None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols is not None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(
                map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one),
                                               dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one),
                                       n=args.n,
                                       p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(
                map(float, sample_one),
                axis=args.axis,
                fisher=args.fisher,
                bias=args.bias,
            )
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one),
                                        score=args.score,
                                        kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one),
                                                   alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one),
                                             low=args.m,
                                             high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one),
                cdf=args.cdf,
                N=args.N,
                alternative=args.alternative,
                mode=args.mode,
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one),
                correction=args.correction,
                lambda_=args.lambda_)
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf == 0 and mf == 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf),
                                   (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf == 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one),
                                 lowerlimit=mf,
                                 inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf == 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one),
                                 upperlimit=nf,
                                 inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf == 0 and mf == 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf == 0 and mf == 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf == 0 and mf == 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf),
                               (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf == 0 and mf == 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    interpolation_method=args.interpolation,
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    (mf, nf),
                    interpolation_method=args.interpolation,
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf == 0 and mf == 0:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf == 0 and mf == 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf == 0 and mf == 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one),
                                    mf,
                                    nf,
                                    newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one),
                               proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(
                map(float, sample_one),
                proportiontocut=args.proportiontocut,
                tail=args.tail,
            )
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf == 0 and mf == 0:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf == 0 and mf == 0:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf == 0 and mf == 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf),
                                          method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda == 0:
                box, ma, ci = stats.boxcox(map(float, sample_one),
                                           alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one),
                                   imbda,
                                   alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one),
                                  map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one),
                                                  map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one),
                                        map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one),
                                       map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two))
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one),
                                          map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one),
                                              map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one),
                                        map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one),
                map(float, sample_two),
                use_continuity=args.mwu_use_continuity,
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one),
                           map(float, sample_two),
                           ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one),
                                                  map(float, sample_two),
                                                  equal_var=args.equal_var)
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one),
                                      map(float, sample_two),
                                      axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one),
                                    map(float, sample_two),
                                    axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one),
                                          map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one),
                map(float, sample_two),
                initial_lexsort=args.initial_lexsort,
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one),
                              map(float, sample_two),
                              base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one),
                                               map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               map(float, sample_two),
                                               ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one),
                    map(float, sample_two),
                    ddof=args.ddof,
                    lambda_=args.lambda_,
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one),
                                                       ddof=args.ddof,
                                                       lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     map(float, sample_two),
                                                     alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one),
                    method=args.med,
                    weights=map(float, sample_two),
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one),
                                                      method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center,
                                         proportiontocut=args.proportiontocut,
                                         *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center,
                                      proportiontocut=args.proportiontocut,
                                      *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties,
                correction=args.correction,
                lambda_=args.lambda_,
                *b_samples)
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
コード例 #7
0
def refer_plot(path,data_head,l,l_init,data_cor,meth,linresu,oneresu,tworesu,l1,l2,l3,l4,l11,l12,l13,l14,std0,std1,z,erz):
	'''
	It gives the plots for one and two components in the reference lines SII and OI

	The parameters needed are:
	path:      Path to the data
	l:         Wavelength range
	data_cor:  Flux for each wavelength
	meth:      Method to be applied (S/O)
	linresu:   Result of the linear fit of the spectra
	oneresu:   Result of the linear+gaussian fit for the reference lines with one component
	tworesu:   Result of the linear+gaussian fit for the reference lines with two components
	l1-l14:    Parts of the spectra where the lines are located
	std0/std1: Where the standard deviation of the continuum is calculated
	z/erz:      Redshift of the galaxy and its error
	'''
	# Rest values of the line wavelengths 
	l_Halpha = 6562.801
	l_NII_1  = 6548.05
	l_NII_2  = 6583.45
	l_SII_1  = 6716.44
	l_SII_2  = 6730.82
	l_OI_1 = 6300.304
	l_OI_2 = 6363.776
	
	# Constants and STIS parameters
	v_luz = 299792.458 # km/s
	plate_scale = data_head['PLATESC']
	fwhm = 2*np.sqrt(2*np.log(2)) # times sigma
	pix_to_v = 47	# km/s
	if plate_scale == 0.05078:
	    siginst = 1.1	# A if binning 1x1 // 2.2 if binning 1x2
	    sig_inst = siginst/fwhm
	    ang_to_pix = 0.554
#	    pix_to_v = 25	# km/s
	elif plate_scale == 0.10156:
	    siginst = 2.2
	    sig_inst = siginst/fwhm
	    ang_to_pix = 1.108
#	    pix_to_v = 47	# km/s

	# Systemic velocity of the galaxy
	vsys = v_luz*z
	er_vsys = v_luz*erz

	# Parameters of the linear fit and the std of the continuum	
	new_slop = linresu.values['slope']
	new_intc = linresu.values['intc']
	stadev = np.std(data_cor[std0:std1])
	
	##################################### PLOT and PRINT for the SII lines ##################################################
	#
	# Now we create the individual gaussians in order to plot and print the results for only 1 component
	print('				RESULTS OF THE FIT: ')
	print('Linear fit equation: {:.5f}*x + {:.5f}'.format(linresu.values['slope'], linresu.values['intc']))
	print('')
	print('The rest of the results can be displayed all together with two/oneresu.params; the data can be accesed with two/oneresu.values['']')
	print('')
	print('The chi-square of the fit for 1 gaussian for the reference line is: {:.5f}'.format(oneresu.chisqr))
	print('The chi-square of the fit for 2 gaussian for the reference line is: {:.5f}'.format(tworesu.chisqr))
	print('')
	
	# Now we create and plot the individual gaussians of the fit
	gaus1 = Ofuncts.gaussian(l,oneresu.values['mu_0'],oneresu.values['sig_0'],oneresu.values['amp_0']) 
	gaus2 = Ofuncts.gaussian(l,oneresu.values['mu_1'],oneresu.values['sig_1'],oneresu.values['amp_1'])
	gaus21 = Ofuncts.gaussian(l,tworesu.values['mu_0'],tworesu.values['sig_0'],tworesu.values['amp_0']) 
	gaus22 = Ofuncts.gaussian(l,tworesu.values['mu_1'],tworesu.values['sig_1'],tworesu.values['amp_1'])
	gaus23 = Ofuncts.gaussian(l,tworesu.values['mu_20'],tworesu.values['sig_20'],tworesu.values['amp_20'])
	gaus24 = Ofuncts.gaussian(l,tworesu.values['mu_21'],tworesu.values['sig_21'],tworesu.values['amp_21'])
	onefin_fit = Ofuncts.twogaussian(l,new_slop,new_intc,
					 oneresu.values['mu_0'],oneresu.values['sig_0'],oneresu.values['amp_0'],
					 oneresu.values['mu_1'],oneresu.values['sig_1'],oneresu.values['amp_1'])
	twofin_fit = Ofuncts.funcSII2comp(l,new_slop,new_intc,
					 tworesu.values['mu_0'],tworesu.values['sig_0'],tworesu.values['amp_0'],
					 tworesu.values['mu_1'],tworesu.values['sig_1'],tworesu.values['amp_1'],
					 tworesu.values['mu_20'],tworesu.values['sig_20'],tworesu.values['amp_20'],
					 tworesu.values['mu_21'],tworesu.values['sig_21'],tworesu.values['amp_21'])
	if meth == 'S':
    	# one component
	    std_2 = np.std(data_cor[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10]-onefin_fit[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10])
	    std_1 = np.std(data_cor[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]]-onefin_fit[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]])
	    ep_1 = std_1/stadev
	    ep_2 = std_2/stadev
	    print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 1 component is... ')
	    print('	For the SII2 line: '+str(ep_2)+' < 3')
	    print('	For the SII1 line: '+str(ep_1)+' < 3')
	    # two components
	    std2_2 = np.std(data_cor[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10]-twofin_fit[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10])
	    std2_1 = np.std(data_cor[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]]-twofin_fit[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]])
	    ep2_1 = std2_1/stadev
	    ep2_2 = std2_2/stadev
	    print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 2 components is... ')
	    print('	For the SII2 line: '+str(ep2_2)+' < 3')
	    print('	For the SII1 line: '+str(ep2_1)+' < 3')

	    # We determine the maximum flux of the fit for all the lines, and the velocity and sigma components
	    maxS1 = onefin_fit[np.where(abs(oneresu.values['mu_0']-l)<0.28)[0][0]]
	    maxS2 = onefin_fit[np.where(abs(oneresu.values['mu_1']-l)<0.28)[0][0]]
	    max2S1 = twofin_fit[np.where(abs(tworesu.values['mu_0']-l)<0.28)[0][0]]
	    max2S2 = twofin_fit[np.where(abs(tworesu.values['mu_1']-l)<0.28)[0][0]]
	    # one component
	    vS2 = v_luz*((oneresu.values['mu_0']-l_SII_2)/l_SII_2)
	    sigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']**2-sig_inst**2)
	    # two comps
	    v2S2 = v_luz*((tworesu.values['mu_0']-l_SII_2)/l_SII_2)
	    v20S2 = v_luz*((tworesu.values['mu_20']-l_SII_2)/l_SII_2)
	    sig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']**2-sig_inst**2)
	    sig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']**2-sig_inst**2)
	    
	    if oneresu.params['mu_0'].stderr == None: 
	        print('Problem determining the errors!')
	        evS2,esigS2 = 0.,0.
	    elif oneresu.params['mu_0'].stderr != None: 
	        evS2 = ((v_luz/l_SII_2)*oneresu.params['mu_0'].stderr)-er_vsys
		esigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']*oneresu.params['sig_0'].stderr)/(np.sqrt(oneresu.values['sig_0']**2-sig_inst**2))

	    if tworesu.params['mu_20'].stderr == None:
	        print('Problem determining the errors!')
	        ev20S2,ev2S2,esig2S2,esig20S2 = 0.,0.,0.,0.
	    elif tworesu.params['mu_20'].stderr != None:
		ev2S2 = ((v_luz/l_SII_2)*tworesu.params['mu_0'].stderr)-er_vsys
		ev20S2 = ((v_luz/l_SII_2)*tworesu.params['mu_20'].stderr)-er_vsys
		esig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']*tworesu.params['sig_0'].stderr)/(np.sqrt(tworesu.values['sig_0']**2-sig_inst**2))
		esig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']*tworesu.params['sig_20'].stderr)/(np.sqrt(tworesu.values['sig_20']**2-sig_inst**2))

	    textstr = '\n'.join((r'$V_{SII_{2}}$ = '+ '{:.2f} +- {:.2f}'.format(vS2,evS2),
			    r'$\sigma_{SII_{2}}$ = '+ '{:.2f} +- {:.2f}'.format(sigS2,esigS2),
			    r'$\frac{F_{SII_{2}}}{F_{SII_{1}}}$ = '+ '{:.3f}'.format(maxS2/maxS1)))
	    textstr2 = '\n'.join((r'$V_{SII_{2-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v2S2,ev2S2),
			    r'$V_{SII_{2-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v20S2,ev20S2),
			    r'$\sigma_{SII_{2-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig2S2,esig2S2),
			    r'$\sigma_{SII_{2-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig20S2,esig20S2),
			    r'$\frac{F_{SII_{2}}}{F_{SII_{1}}}$ = '+ '{:.3f}'.format(max2S2/max2S1)))
#			    r'$F_{SII_{1}}$ = '+ '{:.3f}'.format(max2S1)+' $10^{-14}$'))
	
	elif meth == 'O':
	    # one component
	    std_1 = np.std(data_cor[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]]-onefin_fit[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]])
	    std_2 = np.std(data_cor[np.where(l_init<l13)[0][-1]:np.where(l_init>l14)[0][0]+10]-onefin_fit[np.where(l_init<l13)[0][-1]:np.where(l_init>l14)[0][0]+10])
	    ep_1 = std_1/stadev
	    ep_2 = std_2/stadev
	    print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 1 component is... ')
	    print('	For the SII2 line: '+str(ep_2)+' < 3')
	    print('	For the SII1 line: '+str(ep_1)+' < 3')
	    # two components
	    std2_1 = np.std(data_cor[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]+10]-twofin_fit[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]+10])
	    std2_2 = np.std(data_cor[np.where(l_init<l13)[0][-1]-10:np.where(l_init>l14)[0][0]+10]-twofin_fit[np.where(l_init<l13)[0][-1]-10:np.where(l_init>l14)[0][0]+10])
	    ep2_1 = std2_1/stadev
	    ep2_2 = std2_2/stadev
	    print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 2 components is... ')
	    print('	For the SII2 line: '+str(ep2_2)+' < 3')
	    print('	For the SII1 line: '+str(ep2_1)+' < 3')
	
	    # We determine the maximum flux of the fit for all the lines, and the velocity and sigma components
	    maxS1 = onefin_fit[np.where(abs(oneresu.values['mu_0']-l)<0.27)[0][0]]
	    maxS2 = onefin_fit[np.where(abs(oneresu.values['mu_1']-l)<0.27)[0][0]]
	    max2S1 = twofin_fit[np.where(abs(tworesu.values['mu_0']-l)<0.27)[0][0]]
	    max2S2 = twofin_fit[np.where(abs(tworesu.values['mu_1']-l)<0.27)[0][0]]
	    # one component
	    vS2 = v_luz*((oneresu.values['mu_0']-l_OI_1)/l_OI_1)
	    sigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']**2-sig_inst**2)
	    # two comps
	    v2S2 = v_luz*((tworesu.values['mu_0']-l_OI_1)/l_OI_1)
	    v20S2 = v_luz*((tworesu.values['mu_20']-l_OI_1)/l_OI_1)
	    sig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']**2-sig_inst**2)
	    sig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']**2-sig_inst**2)

	    if oneresu.params['mu_0'].stderr == None: 
	        print('Problem determining the errors!')
	        evS2,esigS2 = 0.,0.
	    elif oneresu.params['mu_0'].stderr != None: 
	        evS2 = ((v_luz/l_OI_1)*oneresu.params['mu_0'].stderr)-er_vsys
		esigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']*oneresu.params['sig_0'].stderr)/(np.sqrt(oneresu.values['sig_0']**2-sig_inst**2))

	    if tworesu.params['mu_20'].stderr == None:
	        print('Problem determining the errors!')
	        ev20S2, ev2S2, esig2S2, esig20S2 = 0.,0.,0.,0.
	    elif tworesu.params['mu_20'].stderr != None:
	        ev2S2 = ((v_luz/l_OI_1)*tworesu.params['mu_0'].stderr)-er_vsys
		ev20S2 = ((v_luz/l_OI_1)*tworesu.params['mu_20'].stderr)-er_vsys
		esig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']*tworesu.params['sig_0'].stderr)/(np.sqrt(tworesu.values['sig_0']**2-sig_inst**2))
		esig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']*tworesu.params['sig_20'].stderr)/(np.sqrt(tworesu.values['sig_20']**2-sig_inst**2))

	    textstr = '\n'.join((r'$V_{OI_{1}}$ = '+ '{:.2f} +- {:.2f}'.format(vS2,evS2),
			    r'$\sigma_{OI_{1}}$ = '+ '{:.2f} +- {:.2f}'.format(sigS2,esigS2),
			    r'$\frac{F_{OI_{2}}}{F_{OI_{1}}}$ = '+ '{:.3f}'.format(maxS2/maxS1)))
	    textstr2 = '\n'.join((r'$V_{OI_{1-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v2S2,ev2S2),
			    r'$V_{OI_{1-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v20S2,ev20S2),
			    r'$\sigma_{OI_{1-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig2S2,esig2S2),
			    r'$\sigma_{OI_{1-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig20S2,esig20S2),
			    r'$\frac{F_{OI_{2}}}{F_{OI_{1}}}$ = '+ '{:.3f}'.format(maxS2/maxS1)))
	

	################################################ PLOT ######################################################
	plt.close()
	# MAIN plot
	fig1   = plt.figure(1,figsize=(10, 9))
	frame1 = fig1.add_axes((.1,.25,.85,.65)) 	     # xstart, ystart, xend, yend [units are fraction of the image frame, from bottom left corner]
	plt.plot(l,data_cor,'k')			     # Initial data
	plt.plot(l,onefin_fit,'r-')
	plt.plot(l,(linresu.values['slope']*l+linresu.values['intc']),c='y',linestyle='-.',label='Linear fit')
	plt.plot(l,gaus1,'b-')
	plt.plot(l,gaus2,'b-',label='Narrow component')
	props = dict(boxstyle='round', facecolor='white', alpha=0.5)
	frame1.text(6350.,max(data_cor), textstr, fontsize=12,verticalalignment='top', bbox=props)
	plt.plot(l[std0:std1],data_cor[std0:std1],'g')	# Zone where the stddev is calculated
	
	frame1.set_xticklabels([]) 			# Remove x-tic labels for the first frame
	plt.ylabel('Flux (x10$^{-14} \mathrm{erg/s/cm^{2} / \AA}$)',fontsize=14)
	plt.tick_params(axis='both', labelsize=12)
	plt.xlim(l[0],l[-1])
	plt.legend(loc='best')
	
	# RESIDUAL plot
	frame2 = fig1.add_axes((.1,.1,.85,.15))
	plt.plot(l,data_cor-onefin_fit,c='k')		# Main
	plt.xlabel('Wavelength ($\AA$)',fontsize=14)
	plt.ylabel('Residuals',fontsize=14)
	plt.tick_params(axis='both', labelsize=12)
	plt.xlim(l[0],l[-1])
	plt.plot(l,np.zeros(len(l)),c='grey',linestyle='--')         	# Line around zero
	plt.plot(l,np.zeros(len(l))+2*stadev,c='grey',linestyle='--')	# 3 sigma upper limit
	plt.plot(l,np.zeros(len(l))-2*stadev,c='grey',linestyle='--') 	# 3 sigma down limit
	plt.ylim(-(3*stadev)*2,(3*stadev)*2)
	
	plt.savefig(path+'adj_met'+str(meth)+'_ref_1comp.png')
	
	#######################################################################################
	# Two components in reference line
	# MAIN plot
	fig2   = plt.figure(2,figsize=(10, 9))
	frame3 = fig2.add_axes((.1,.25,.85,.65)) 	     # xstart, ystart, xend, yend [units are fraction of the image frame, from bottom left corner]
	plt.plot(l,data_cor,'k')		     # Initial data
	plt.plot(l,twofin_fit,'r-')
	plt.plot(l,(linresu.values['slope']*l+linresu.values['intc']),c='y',linestyle='-.',label='Linear fit')
	plt.plot(l,gaus21,'b-')
	plt.plot(l,gaus22,'b-',label='Narrow component')
	plt.plot(l,gaus23,'m-')
	plt.plot(l,gaus24,'m-',label='Secondary component')
	props = dict(boxstyle='round', facecolor='white', alpha=0.5)
	frame3.text(6350.,max(data_cor), textstr2, fontsize=12,verticalalignment='top', bbox=props)
	plt.plot(l[std0:std1],data_cor[std0:std1],'g')	# Zone where the stddev is calculated
	
	frame3.set_xticklabels([]) 			# Remove x-tic labels for the first frame
	plt.ylabel('Flux (x10$^{-14} \mathrm{erg/s/cm^{2} / \AA}$)',fontsize=14)
	plt.tick_params(axis='both', labelsize=12)
	plt.xlim(l[0],l[-1])
	plt.legend(loc='best')
	
	# RESIDUAL plot
	frame4 = fig2.add_axes((.1,.1,.85,.15))
	plt.plot(l,data_cor-twofin_fit,c='k')		# Main
	plt.xlabel('Wavelength ($\AA$)',fontsize=14)
	plt.ylabel('Residuals',fontsize=14)
	plt.tick_params(axis='both', labelsize=12)
	plt.xlim(l[0],l[-1])
	plt.plot(l,np.zeros(len(l)),c='grey',linestyle='--')         	# Line around zero
	plt.plot(l,np.zeros(len(l))+2*stadev,c='grey',linestyle='--')	# 3 sigma upper limit
	plt.plot(l,np.zeros(len(l))-2*stadev,c='grey',linestyle='--') 	# 3 sigma down limit
	plt.ylim(-(3*stadev)*2,(3*stadev)*2)
	
	plt.savefig(path+'adj_met'+str(meth)+'_ref_2comp.png')

	##############################################################################################################################################################################
	# We make an F-test to see if it is significant the presence of a second component in the lines. 
	# As the only possible method here is the S-method due to the fact that there are no O-lines in this spectra, 
	# then the method can only be applied to the SII lines (so the wavelength range would be around this two lines)
	if oneresu.chisqr < tworesu.chisqr: 
	    print('The probability cannot be calculated as both chi-square are equal!')
	else: 
	    fvalue, pvalue = stats.f_oneway(data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-onefin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20],
						data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-twofin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20])
	    statist, pvalue2 = stats.levene(data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-onefin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20],
						data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-twofin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20])
	    pre_x = data_cor[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20]-onefin_fit[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20]
	    pre_y = data_cor[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20]-twofin_fit[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20]
	    tx, ty = stats.obrientransform(pre_x, pre_y)
	    fvalue1, pvalue1 = stats.f_oneway(tx,ty)
	    fstat = ftest(oneresu.chisqr,tworesu.chisqr,oneresu.nfree,tworesu.nfree)
	    print('')
	    print('The probability of a second component (one component vs two components) using the F-test is: '+str(pvalue))
	    print('The probability of a second component (one component vs two components) with the F-test (and O Brien) is: '+str(pvalue1))
	    print('The probability of a second component (one component vs two components) using the Levene-test is: '+str(pvalue2))
	    print('The probability of a second component (one component vs two components) with the F-test of IDL is: '+str(fstat['p-value']))
	    print('')

        return ep_1,ep_2,ep2_1,ep2_2