def test_obrientransform(): #this is a regression test to check np.var replacement #I didn't separately verigy the numbers x1 = np.arange(5) result = np.array( [[ 5.41666667, 1.04166667, -0.41666667, 1.04166667, 5.41666667], [ 21.66666667, 4.16666667, -1.66666667, 4.16666667, 21.66666667]]) assert_array_almost_equal(stats.obrientransform(x1, 2*x1), result, decimal=8)
def test_obrientransform(): #this is a regression test to check np.var replacement #I didn't separately verigy the numbers x1 = np.arange(5) result = np.array( [[5.41666667, 1.04166667, -0.41666667, 1.04166667, 5.41666667], [21.66666667, 4.16666667, -1.66666667, 4.16666667, 21.66666667]]) assert_array_almost_equal(stats.obrientransform(x1, 2 * x1), result, decimal=8)
def test_obrientransform(self): for n in self.get_n(): x, y, xm, ym = self.generate_xy_sample(n) r = stats.obrientransform(x) rm = stats.mstats.obrientransform(xm) assert_almost_equal(r.T, rm[0:len(x)])
def test_obrientransform(self): for n in self.get_n(): x, y, xm, ym = self.generate_xy_sample(n) r = stats.obrientransform(x) rm = stats.mstats.obrientransform(xm) assert_almost_equal(r.T, rm[0:len(x)])
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument( "--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;", ) parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help= "Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help= "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values.", ) parser.add_argument( "--fisher", action="store_true", default=False, help="if true then Fisher definition is used", ) parser.add_argument( "--bias", action="store_true", default=False, help= "if false,then the calculations are corrected for statistical bias", ) parser.add_argument( "--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored", ) parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored", ) parser.add_argument( "--inclusive", action="store_true", default=False, help="if false,limit will be ignored", ) parser.add_argument( "--printextras", action="store_true", default=False, help= "If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help= "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument( "--correction", action="store_true", default=False, help="continuity correction ", ) parser.add_argument( "--axis", type=int, default=0, help= "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help= "the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument( "--score", type=int, default=0, help="Score that is compared to the elements in a.", ) parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help= "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument( "--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds", ) parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help= "lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help= "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument( "--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e", ) parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols is not None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols is not None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols is not None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe( map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis( map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias, ) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode, ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf == 0 and mf == 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf == 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf == 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf == 0 and mf == 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf == 0 and mf == 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf == 0 and mf == 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf == 0 and mf == 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation, ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation, ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf == 0 and mf == 0: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf == 0 and mf == 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf == 0 and mf == 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1( map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail, ) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf == 0 and mf == 0: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf == 0 and mf == 0: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf == 0 and mf == 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda == 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two)) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity, ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one), map(float, sample_two), equal_var=args.equal_var) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort, ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction, ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_, ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two), ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
def refer_plot(path,data_head,l,l_init,data_cor,meth,linresu,oneresu,tworesu,l1,l2,l3,l4,l11,l12,l13,l14,std0,std1,z,erz): ''' It gives the plots for one and two components in the reference lines SII and OI The parameters needed are: path: Path to the data l: Wavelength range data_cor: Flux for each wavelength meth: Method to be applied (S/O) linresu: Result of the linear fit of the spectra oneresu: Result of the linear+gaussian fit for the reference lines with one component tworesu: Result of the linear+gaussian fit for the reference lines with two components l1-l14: Parts of the spectra where the lines are located std0/std1: Where the standard deviation of the continuum is calculated z/erz: Redshift of the galaxy and its error ''' # Rest values of the line wavelengths l_Halpha = 6562.801 l_NII_1 = 6548.05 l_NII_2 = 6583.45 l_SII_1 = 6716.44 l_SII_2 = 6730.82 l_OI_1 = 6300.304 l_OI_2 = 6363.776 # Constants and STIS parameters v_luz = 299792.458 # km/s plate_scale = data_head['PLATESC'] fwhm = 2*np.sqrt(2*np.log(2)) # times sigma pix_to_v = 47 # km/s if plate_scale == 0.05078: siginst = 1.1 # A if binning 1x1 // 2.2 if binning 1x2 sig_inst = siginst/fwhm ang_to_pix = 0.554 # pix_to_v = 25 # km/s elif plate_scale == 0.10156: siginst = 2.2 sig_inst = siginst/fwhm ang_to_pix = 1.108 # pix_to_v = 47 # km/s # Systemic velocity of the galaxy vsys = v_luz*z er_vsys = v_luz*erz # Parameters of the linear fit and the std of the continuum new_slop = linresu.values['slope'] new_intc = linresu.values['intc'] stadev = np.std(data_cor[std0:std1]) ##################################### PLOT and PRINT for the SII lines ################################################## # # Now we create the individual gaussians in order to plot and print the results for only 1 component print(' RESULTS OF THE FIT: ') print('Linear fit equation: {:.5f}*x + {:.5f}'.format(linresu.values['slope'], linresu.values['intc'])) print('') print('The rest of the results can be displayed all together with two/oneresu.params; the data can be accesed with two/oneresu.values['']') print('') print('The chi-square of the fit for 1 gaussian for the reference line is: {:.5f}'.format(oneresu.chisqr)) print('The chi-square of the fit for 2 gaussian for the reference line is: {:.5f}'.format(tworesu.chisqr)) print('') # Now we create and plot the individual gaussians of the fit gaus1 = Ofuncts.gaussian(l,oneresu.values['mu_0'],oneresu.values['sig_0'],oneresu.values['amp_0']) gaus2 = Ofuncts.gaussian(l,oneresu.values['mu_1'],oneresu.values['sig_1'],oneresu.values['amp_1']) gaus21 = Ofuncts.gaussian(l,tworesu.values['mu_0'],tworesu.values['sig_0'],tworesu.values['amp_0']) gaus22 = Ofuncts.gaussian(l,tworesu.values['mu_1'],tworesu.values['sig_1'],tworesu.values['amp_1']) gaus23 = Ofuncts.gaussian(l,tworesu.values['mu_20'],tworesu.values['sig_20'],tworesu.values['amp_20']) gaus24 = Ofuncts.gaussian(l,tworesu.values['mu_21'],tworesu.values['sig_21'],tworesu.values['amp_21']) onefin_fit = Ofuncts.twogaussian(l,new_slop,new_intc, oneresu.values['mu_0'],oneresu.values['sig_0'],oneresu.values['amp_0'], oneresu.values['mu_1'],oneresu.values['sig_1'],oneresu.values['amp_1']) twofin_fit = Ofuncts.funcSII2comp(l,new_slop,new_intc, tworesu.values['mu_0'],tworesu.values['sig_0'],tworesu.values['amp_0'], tworesu.values['mu_1'],tworesu.values['sig_1'],tworesu.values['amp_1'], tworesu.values['mu_20'],tworesu.values['sig_20'],tworesu.values['amp_20'], tworesu.values['mu_21'],tworesu.values['sig_21'],tworesu.values['amp_21']) if meth == 'S': # one component std_2 = np.std(data_cor[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10]-onefin_fit[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10]) std_1 = np.std(data_cor[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]]-onefin_fit[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]]) ep_1 = std_1/stadev ep_2 = std_2/stadev print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 1 component is... ') print(' For the SII2 line: '+str(ep_2)+' < 3') print(' For the SII1 line: '+str(ep_1)+' < 3') # two components std2_2 = np.std(data_cor[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10]-twofin_fit[np.where(l_init<l1)[0][-1]:np.where(l_init>l2)[0][0]+10]) std2_1 = np.std(data_cor[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]]-twofin_fit[np.where(l_init<l3)[0][-1]-10:np.where(l_init>l4)[0][0]]) ep2_1 = std2_1/stadev ep2_2 = std2_2/stadev print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 2 components is... ') print(' For the SII2 line: '+str(ep2_2)+' < 3') print(' For the SII1 line: '+str(ep2_1)+' < 3') # We determine the maximum flux of the fit for all the lines, and the velocity and sigma components maxS1 = onefin_fit[np.where(abs(oneresu.values['mu_0']-l)<0.28)[0][0]] maxS2 = onefin_fit[np.where(abs(oneresu.values['mu_1']-l)<0.28)[0][0]] max2S1 = twofin_fit[np.where(abs(tworesu.values['mu_0']-l)<0.28)[0][0]] max2S2 = twofin_fit[np.where(abs(tworesu.values['mu_1']-l)<0.28)[0][0]] # one component vS2 = v_luz*((oneresu.values['mu_0']-l_SII_2)/l_SII_2) sigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']**2-sig_inst**2) # two comps v2S2 = v_luz*((tworesu.values['mu_0']-l_SII_2)/l_SII_2) v20S2 = v_luz*((tworesu.values['mu_20']-l_SII_2)/l_SII_2) sig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']**2-sig_inst**2) sig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']**2-sig_inst**2) if oneresu.params['mu_0'].stderr == None: print('Problem determining the errors!') evS2,esigS2 = 0.,0. elif oneresu.params['mu_0'].stderr != None: evS2 = ((v_luz/l_SII_2)*oneresu.params['mu_0'].stderr)-er_vsys esigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']*oneresu.params['sig_0'].stderr)/(np.sqrt(oneresu.values['sig_0']**2-sig_inst**2)) if tworesu.params['mu_20'].stderr == None: print('Problem determining the errors!') ev20S2,ev2S2,esig2S2,esig20S2 = 0.,0.,0.,0. elif tworesu.params['mu_20'].stderr != None: ev2S2 = ((v_luz/l_SII_2)*tworesu.params['mu_0'].stderr)-er_vsys ev20S2 = ((v_luz/l_SII_2)*tworesu.params['mu_20'].stderr)-er_vsys esig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']*tworesu.params['sig_0'].stderr)/(np.sqrt(tworesu.values['sig_0']**2-sig_inst**2)) esig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']*tworesu.params['sig_20'].stderr)/(np.sqrt(tworesu.values['sig_20']**2-sig_inst**2)) textstr = '\n'.join((r'$V_{SII_{2}}$ = '+ '{:.2f} +- {:.2f}'.format(vS2,evS2), r'$\sigma_{SII_{2}}$ = '+ '{:.2f} +- {:.2f}'.format(sigS2,esigS2), r'$\frac{F_{SII_{2}}}{F_{SII_{1}}}$ = '+ '{:.3f}'.format(maxS2/maxS1))) textstr2 = '\n'.join((r'$V_{SII_{2-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v2S2,ev2S2), r'$V_{SII_{2-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v20S2,ev20S2), r'$\sigma_{SII_{2-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig2S2,esig2S2), r'$\sigma_{SII_{2-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig20S2,esig20S2), r'$\frac{F_{SII_{2}}}{F_{SII_{1}}}$ = '+ '{:.3f}'.format(max2S2/max2S1))) # r'$F_{SII_{1}}$ = '+ '{:.3f}'.format(max2S1)+' $10^{-14}$')) elif meth == 'O': # one component std_1 = np.std(data_cor[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]]-onefin_fit[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]]) std_2 = np.std(data_cor[np.where(l_init<l13)[0][-1]:np.where(l_init>l14)[0][0]+10]-onefin_fit[np.where(l_init<l13)[0][-1]:np.where(l_init>l14)[0][0]+10]) ep_1 = std_1/stadev ep_2 = std_2/stadev print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 1 component is... ') print(' For the SII2 line: '+str(ep_2)+' < 3') print(' For the SII1 line: '+str(ep_1)+' < 3') # two components std2_1 = np.std(data_cor[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]+10]-twofin_fit[np.where(l_init<l11)[0][-1]-10:np.where(l_init>l12)[0][0]+10]) std2_2 = np.std(data_cor[np.where(l_init<l13)[0][-1]-10:np.where(l_init>l14)[0][0]+10]-twofin_fit[np.where(l_init<l13)[0][-1]-10:np.where(l_init>l14)[0][0]+10]) ep2_1 = std2_1/stadev ep2_2 = std2_2/stadev print('The condition for each line (in the same order as before) needs to be std_line < 3*std_cont --> for 2 components is... ') print(' For the SII2 line: '+str(ep2_2)+' < 3') print(' For the SII1 line: '+str(ep2_1)+' < 3') # We determine the maximum flux of the fit for all the lines, and the velocity and sigma components maxS1 = onefin_fit[np.where(abs(oneresu.values['mu_0']-l)<0.27)[0][0]] maxS2 = onefin_fit[np.where(abs(oneresu.values['mu_1']-l)<0.27)[0][0]] max2S1 = twofin_fit[np.where(abs(tworesu.values['mu_0']-l)<0.27)[0][0]] max2S2 = twofin_fit[np.where(abs(tworesu.values['mu_1']-l)<0.27)[0][0]] # one component vS2 = v_luz*((oneresu.values['mu_0']-l_OI_1)/l_OI_1) sigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']**2-sig_inst**2) # two comps v2S2 = v_luz*((tworesu.values['mu_0']-l_OI_1)/l_OI_1) v20S2 = v_luz*((tworesu.values['mu_20']-l_OI_1)/l_OI_1) sig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']**2-sig_inst**2) sig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']**2-sig_inst**2) if oneresu.params['mu_0'].stderr == None: print('Problem determining the errors!') evS2,esigS2 = 0.,0. elif oneresu.params['mu_0'].stderr != None: evS2 = ((v_luz/l_OI_1)*oneresu.params['mu_0'].stderr)-er_vsys esigS2 = pix_to_v*np.sqrt(oneresu.values['sig_0']*oneresu.params['sig_0'].stderr)/(np.sqrt(oneresu.values['sig_0']**2-sig_inst**2)) if tworesu.params['mu_20'].stderr == None: print('Problem determining the errors!') ev20S2, ev2S2, esig2S2, esig20S2 = 0.,0.,0.,0. elif tworesu.params['mu_20'].stderr != None: ev2S2 = ((v_luz/l_OI_1)*tworesu.params['mu_0'].stderr)-er_vsys ev20S2 = ((v_luz/l_OI_1)*tworesu.params['mu_20'].stderr)-er_vsys esig2S2 = pix_to_v*np.sqrt(tworesu.values['sig_0']*tworesu.params['sig_0'].stderr)/(np.sqrt(tworesu.values['sig_0']**2-sig_inst**2)) esig20S2 = pix_to_v*np.sqrt(tworesu.values['sig_20']*tworesu.params['sig_20'].stderr)/(np.sqrt(tworesu.values['sig_20']**2-sig_inst**2)) textstr = '\n'.join((r'$V_{OI_{1}}$ = '+ '{:.2f} +- {:.2f}'.format(vS2,evS2), r'$\sigma_{OI_{1}}$ = '+ '{:.2f} +- {:.2f}'.format(sigS2,esigS2), r'$\frac{F_{OI_{2}}}{F_{OI_{1}}}$ = '+ '{:.3f}'.format(maxS2/maxS1))) textstr2 = '\n'.join((r'$V_{OI_{1-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v2S2,ev2S2), r'$V_{OI_{1-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(v20S2,ev20S2), r'$\sigma_{OI_{1-1comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig2S2,esig2S2), r'$\sigma_{OI_{1-2comp}}$ = '+ '{:.2f} +- {:.2f}'.format(sig20S2,esig20S2), r'$\frac{F_{OI_{2}}}{F_{OI_{1}}}$ = '+ '{:.3f}'.format(maxS2/maxS1))) ################################################ PLOT ###################################################### plt.close() # MAIN plot fig1 = plt.figure(1,figsize=(10, 9)) frame1 = fig1.add_axes((.1,.25,.85,.65)) # xstart, ystart, xend, yend [units are fraction of the image frame, from bottom left corner] plt.plot(l,data_cor,'k') # Initial data plt.plot(l,onefin_fit,'r-') plt.plot(l,(linresu.values['slope']*l+linresu.values['intc']),c='y',linestyle='-.',label='Linear fit') plt.plot(l,gaus1,'b-') plt.plot(l,gaus2,'b-',label='Narrow component') props = dict(boxstyle='round', facecolor='white', alpha=0.5) frame1.text(6350.,max(data_cor), textstr, fontsize=12,verticalalignment='top', bbox=props) plt.plot(l[std0:std1],data_cor[std0:std1],'g') # Zone where the stddev is calculated frame1.set_xticklabels([]) # Remove x-tic labels for the first frame plt.ylabel('Flux (x10$^{-14} \mathrm{erg/s/cm^{2} / \AA}$)',fontsize=14) plt.tick_params(axis='both', labelsize=12) plt.xlim(l[0],l[-1]) plt.legend(loc='best') # RESIDUAL plot frame2 = fig1.add_axes((.1,.1,.85,.15)) plt.plot(l,data_cor-onefin_fit,c='k') # Main plt.xlabel('Wavelength ($\AA$)',fontsize=14) plt.ylabel('Residuals',fontsize=14) plt.tick_params(axis='both', labelsize=12) plt.xlim(l[0],l[-1]) plt.plot(l,np.zeros(len(l)),c='grey',linestyle='--') # Line around zero plt.plot(l,np.zeros(len(l))+2*stadev,c='grey',linestyle='--') # 3 sigma upper limit plt.plot(l,np.zeros(len(l))-2*stadev,c='grey',linestyle='--') # 3 sigma down limit plt.ylim(-(3*stadev)*2,(3*stadev)*2) plt.savefig(path+'adj_met'+str(meth)+'_ref_1comp.png') ####################################################################################### # Two components in reference line # MAIN plot fig2 = plt.figure(2,figsize=(10, 9)) frame3 = fig2.add_axes((.1,.25,.85,.65)) # xstart, ystart, xend, yend [units are fraction of the image frame, from bottom left corner] plt.plot(l,data_cor,'k') # Initial data plt.plot(l,twofin_fit,'r-') plt.plot(l,(linresu.values['slope']*l+linresu.values['intc']),c='y',linestyle='-.',label='Linear fit') plt.plot(l,gaus21,'b-') plt.plot(l,gaus22,'b-',label='Narrow component') plt.plot(l,gaus23,'m-') plt.plot(l,gaus24,'m-',label='Secondary component') props = dict(boxstyle='round', facecolor='white', alpha=0.5) frame3.text(6350.,max(data_cor), textstr2, fontsize=12,verticalalignment='top', bbox=props) plt.plot(l[std0:std1],data_cor[std0:std1],'g') # Zone where the stddev is calculated frame3.set_xticklabels([]) # Remove x-tic labels for the first frame plt.ylabel('Flux (x10$^{-14} \mathrm{erg/s/cm^{2} / \AA}$)',fontsize=14) plt.tick_params(axis='both', labelsize=12) plt.xlim(l[0],l[-1]) plt.legend(loc='best') # RESIDUAL plot frame4 = fig2.add_axes((.1,.1,.85,.15)) plt.plot(l,data_cor-twofin_fit,c='k') # Main plt.xlabel('Wavelength ($\AA$)',fontsize=14) plt.ylabel('Residuals',fontsize=14) plt.tick_params(axis='both', labelsize=12) plt.xlim(l[0],l[-1]) plt.plot(l,np.zeros(len(l)),c='grey',linestyle='--') # Line around zero plt.plot(l,np.zeros(len(l))+2*stadev,c='grey',linestyle='--') # 3 sigma upper limit plt.plot(l,np.zeros(len(l))-2*stadev,c='grey',linestyle='--') # 3 sigma down limit plt.ylim(-(3*stadev)*2,(3*stadev)*2) plt.savefig(path+'adj_met'+str(meth)+'_ref_2comp.png') ############################################################################################################################################################################## # We make an F-test to see if it is significant the presence of a second component in the lines. # As the only possible method here is the S-method due to the fact that there are no O-lines in this spectra, # then the method can only be applied to the SII lines (so the wavelength range would be around this two lines) if oneresu.chisqr < tworesu.chisqr: print('The probability cannot be calculated as both chi-square are equal!') else: fvalue, pvalue = stats.f_oneway(data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-onefin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20], data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-twofin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]) statist, pvalue2 = stats.levene(data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-onefin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20], data_cor[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]-twofin_fit[np.where(l_init>l3)[0][0]-20:np.where(l_init<l2)[0][-1]+20]) pre_x = data_cor[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20]-onefin_fit[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20] pre_y = data_cor[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20]-twofin_fit[np.where(l_init<l3)[0][-1]-20:np.where(l_init>l2)[0][0]+20] tx, ty = stats.obrientransform(pre_x, pre_y) fvalue1, pvalue1 = stats.f_oneway(tx,ty) fstat = ftest(oneresu.chisqr,tworesu.chisqr,oneresu.nfree,tworesu.nfree) print('') print('The probability of a second component (one component vs two components) using the F-test is: '+str(pvalue)) print('The probability of a second component (one component vs two components) with the F-test (and O Brien) is: '+str(pvalue1)) print('The probability of a second component (one component vs two components) using the Levene-test is: '+str(pvalue2)) print('The probability of a second component (one component vs two components) with the F-test of IDL is: '+str(fstat['p-value'])) print('') return ep_1,ep_2,ep2_1,ep2_2