def AD_test(groups, outfile): jdelim = args.delimiter if args.delimiter != None else ' ' for i,u in enumerate(groups): for j,v in enumerate(groups): if j > i or (j == i and len(args.columns) == 1): break for x,us in enumerate(u.samples): for y,vs in enumerate(v.samples): if len(vs) < args.ignore or len(us) < args.ignore: continue if j == i and y >= x: break if args.random != None: verdict = False for k in range(args.random): res = anderson_ksamp([random.sample(us, args.subsample), random.sample(vs, args.subsample)]) if res[0] < res[1][0]: verdict = True outfile.write(jdelim.join(u.tup + v.tup + map(str, res)) + '\n') outfile.write('Verdict:' + str(verdict) + '\n') else: res = anderson_ksamp([us, vs]) verdict = False if res[0] < res[1][0]: verdict = True outfile.write(jdelim.join(u.tup + v.tup + map(str, res)) + '\n') outfile.write('Verdict:' + str(verdict) + '\n')
def result_stat_tests(inn_samps, mcmc_samps, cnt, parnames): """ Record and print ks and AD test statistics """ ks_mcmc_arr = [] ks_inn_arr = [] ad_mcmc_arr = [] ad_inn_arr = [] # iterate through each parameter for i in range(inn_samps.shape[1]): # get ideal bayesian number. We want the 2 tailed p value from the KS test FYI ks_mcmc_result = ks_2samp(mcmc_samps[:int(mcmc_samps.shape[0]/2.0), i], mcmc_samps[int(mcmc_samps.shape[0]/2.0):, i]) ad_mcmc_result = anderson_ksamp([mcmc_samps[:int(mcmc_samps.shape[0]/2.0), i], mcmc_samps[int(mcmc_samps.shape[0]/2.0):, i]]) # get predicted vs. true number ks_inn_result = ks_2samp(inn_samps[:,i],mcmc_samps[:,i]) ad_inn_result = anderson_ksamp([inn_samps[:,i],mcmc_samps[:,i]]) #print('Test Case %d, Parameter(%s) k-s result: [Ideal(%.6f), Predicted(%.6f)]' % (int(cnt),parnames[i],np.array(ks_mcmc_result[1]),np.array(ks_inn_result[1]))) #print('Test Case %d, Parameter(%s) A-D result: [Ideal(%.6f), Predicted(%.6f)]' % (int(cnt),parnames[i],np.array(ad_mcmc_result[0]),np.array(ad_inn_result[0]))) # store result stats ks_mcmc_arr.append(ks_mcmc_result[1]) ks_inn_arr.append(ks_inn_result[1]) ad_mcmc_arr.append(ad_mcmc_result[0]) ad_inn_arr.append(ad_inn_result[0]) return ks_mcmc_arr, ks_inn_arr, ad_mcmc_arr, ad_inn_arr
def overlap_tests(pred_samp,lalinf_samp,true_vals,kernel_cnn,kernel_lalinf): """ Perform Anderson-Darling, K-S, and overlap tests to get quantifiable values for accuracy of GAN PE method Parameters ---------- pred_samp: numpy array predicted PE samples from CNN lalinf_samp: numpy array predicted PE samples from lalinference true_vals: true scalar point values for parameters to be estimated (taken from GW event paper) kernel_cnn: scipy kde instance gaussian kde of CNN results kernel_lalinf: scipy kde instance gaussian kde of lalinference results Returns ------- ks_score: k-s test score ad_score: anderson-darling score beta_score: overlap score. used to determine goodness of CNN PE estimates """ # do k-s test ks_mc_score = ks_2samp(pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:]) ks_q_score = ks_2samp(pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:]) ks_score = np.array([ks_mc_score,ks_q_score]) # do anderson-darling test ad_mc_score = anderson_ksamp([pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:]]) ad_q_score = anderson_ksamp([pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:]]) ad_score = [ad_mc_score,ad_q_score] # compute overlap statistic comb_mc = np.concatenate((pred_samp[:,0].reshape(pred_samp[:,0].shape[0],1),lalinf_samp[0][:].reshape(lalinf_samp[0][:].shape[0],1))) comb_q = np.concatenate((pred_samp[:,1].reshape(pred_samp[:,1].shape[0],1),lalinf_samp[1][:].reshape(lalinf_samp[1][:].shape[0],1))) X, Y = np.mgrid[np.min(comb_mc):np.max(comb_mc):100j, np.min(comb_q):np.max(comb_q):100j] positions = np.vstack([X.ravel(), Y.ravel()]) #cnn_pdf = np.reshape(kernel_cnn(positions).T, X.shape) #print(positions.shape,pred_samp.shape) cnn_pdf = kernel_cnn.pdf(positions) #X, Y = np.mgrid[np.min(lalinf_samp[0][:]):np.max(lalinf_samp[0][:]):100j, np.min(lalinf_samp[1][:]):np.max(lalinf_samp[1][:]):100j] positions = np.vstack([X.ravel(), Y.ravel()]) #lalinf_pdf = np.reshape(kernel_lalinf(positions).T, X.shape) lalinf_pdf = kernel_lalinf.pdf(positions) beta_score = np.divide(np.sum( cnn_pdf*lalinf_pdf ), np.sqrt(np.sum( cnn_pdf**2 ) * np.sum( lalinf_pdf**2 ))) return ks_score, ad_score, beta_score
def ADtest_pm(msk_in, plx_data_full): pmRA_in, pmDEC_in = plx_data_full['pmRA'][msk_in],\ plx_data_full['pmDE'][msk_in] pmRA_out, pmDEC_out = plx_data_full['pmRA'][~msk_in],\ plx_data_full['pmDE'][~msk_in] return [ list(anderson_ksamp([pmRA_in, pmRA_out])), list(anderson_ksamp([pmDEC_in, pmDEC_out])) ]
def _prob_ad(a, b): _, _, prob = anderson_ksamp([a, b]) with np.errstate(divide='ignore'): lnprob = np.log(prob) if prob > 1: print print anderson_ksamp([a, b]) print ks_2samp(a, b) print print a print b print prob, lnprob return prob, lnprob
def main(): if len(sys.argv) < 4: return 1 _, list_a, list_b, significance = sys.argv[:4] list_a = json.loads(list_a) list_b = json.loads(list_b) significance = float(significance) shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] results = { 'first_sample': list_a, 'second_sample': list_b, 'shapiro_p_value': shapiro_p_value, 'mann_p_value': mann_whitney_p_value, 'anderson_p_value': anderson_p_value, 'welch_p_value': welch_p_value, } if (results['shapiro_p_value'][0] < significance and results['shapiro_p_value'][1] < significance): results['normal-y'] = True else: results['normal-y'] = False results['significantly_different'] = bool( float(results['mann_p_value']) < float(significance)) print json.dumps(results) return 0
def one_dimensional_test(self, X_tr, X_te): p_vals = [] # For each dimension we conduct a separate KS test for i in range(X_tr.shape[1]): feature_tr = X_tr[:, i] feature_te = X_te[:, i] t_val, p_val = None, None if self.ot == OnedimensionalTest.KS: # Compute KS statistic and p-value t_val, p_val = ks_2samp(feature_tr, feature_te) elif self.ot == OnedimensionalTest.AD: t_val, _, p_val = anderson_ksamp( [feature_tr.tolist(), feature_te.tolist()]) p_vals.append(p_val) # Apply the Bonferroni correction to bound the family-wise error rate. This can be done by picking the minimum # p-value from all individual tests. p_vals = np.array(p_vals) p_val = min(np.min(p_vals), 1.0) return p_val, p_vals
def test_example2b(self): # Example data taken from an earlier technical report of # Scholz and Stephens t1 = [194, 15, 41, 29, 33, 181] t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118] t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34] t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29, 118, 25, 156, 310, 76, 26, 44, 23, 62] t5 = [130, 208, 70, 101, 208] t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27] t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33] t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5, 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95] t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82, 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24] t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36, 22, 139, 210, 97, 30, 23, 13, 14] t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438] t12 = [50, 254, 5, 283, 35, 12] t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130] t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66, 61, 34] with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='approximate p-value') Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14), midrank=True) assert_almost_equal(Tk, 3.294, 3) assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009], tm, 4) assert_almost_equal(p, 0.0041, 4)
def test_dist_fit(data, orig_data, method): if method == 'ad': stat_results = st.anderson_ksamp([orig_data, data]) elif method == 'ks': stat_results = st.ks_2samp(orig_data, data) # return stat_results
def get_ks_by_user_vector(self, matrix_id): output = self.output_json sort_vector = output.get('sort_vector', None) if not sort_vector: return False # descending sort order sort_order = numpy.argsort(sort_vector)[::-1] flcm = AnalysisDatasets.objects\ .filter(analysis_id=self.id, count_matrix=matrix_id)\ .select_related('count_matrix')\ .first() n = len(sort_order) values = list(flcm.count_matrix.df['All bins']) quartiles = [[], [], [], []] for i, index in enumerate(sort_order): quartiles[math.floor(4*i/n)].append(values[index]) stat, cv, sig = stats.anderson_ksamp(quartiles) return { 'statistic': stat, 'critical_values': cv, 'significance': sig, }
def ad(d1, d2, verbose=False): """ Calculates the Anderson-Darling TS on 2 distributions. Can be used on continuous or discrete distributions. Any binning/bucketing of the distributions/samples should be done before passing them to this function. Anderson & Darling 1954 Advantages: - Unlike the KS, the AD (like the ES) can be used on both continuous & discrete distributions. - Works well even when dist has fewer than 25 observations. - More powerful than KS, especially for differences in the tails of distributions. Args: d1 (np.array or pandas.core.series.Series): first sample d2 (np.array or pandas.core.series.Series): second sample verbose (bool): helpful interpretation msgs printed to stdout (default False) Returns: (float, float): AD test stat and p-value of rejecting the null hypothesis (that the two distributions are identical) """ d1 = assure_numpy_array(d1) d2 = assure_numpy_array(d2) ad, critical_values, pvalue = stats.anderson_ksamp([d1, d2]) return ad, pvalue
def getAD(pdQuery, pdRef): arQuery = pdQuery.values.flatten().tolist() arRef = pdRef.values.flatten().tolist() tupAD = stats.anderson_ksamp([arQuery, arRef]) return tupAD
def computeAD2Sample(data,mu,sd,seed): Nsample = len(data) np.random.seed(seed) otherdata = np.random.normal(mu, sd, Nsample) from scipy import stats res = stats.anderson_ksamp((data, otherdata)) return [res.statistic, res.significance_level,res.critical_values.tolist()]
def main(): if len(sys.argv) < 4: return 1 _, list_a, list_b, significance = sys.argv[:4] list_a = json.loads(list_a) list_b = json.loads(list_b) significance = float(significance) shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] results = { 'first_sample': list_a, 'second_sample': list_b, 'shapiro_p_value': shapiro_p_value, 'mann_p_value': mann_whitney_p_value, 'anderson_p_value': anderson_p_value, 'welch_p_value': welch_p_value, } # TODO(robertocn): It seems we haven't used the results of shapiro test for # normality. We should remove this along with anderson darling and welch's. if (results['shapiro_p_value'][0] < significance and results['shapiro_p_value'][1] < significance): results['normal-y'] = True else: results['normal-y'] = False results['significantly_different'] = bool( float(results['mann_p_value']) < float(significance)) print json.dumps(results) return 0
def p_value_scoring_object_AD(clf, X, y): """ p_value_getter is a scoring callable that returns the negative p value from the KS test on the prediction probabilities for the particle and antiparticle samples. """ #Finding out the prediction probabilities prob_pred=clf.predict_proba(X)[:,1] #print(prob_pred) #This can be deleted if not using Keras #For Keras turn cathegorical y back to normal y if y.ndim==2: if y.shape[0]!=1 and y.shape[1]!=1: #Then we have a cathegorical vector y = y[:,1] #making sure the inputs are row vectors y = np.reshape(y,(1,y.shape[0])) prob_pred = np.reshape(prob_pred,(1,prob_pred.shape[0])) #Separate prob into particle and antiparticle samples prob_0 = prob_pred[np.logical_or.reduce([y==0])] prob_1 = prob_pred[np.logical_or.reduce([y==1])] #if __debug__: #print("Plot") p_AD_stat=stats.anderson_ksamp([prob_0,prob_1]) print(p_AD_stat) p_AD=-p_AD_stat[2] return p_AD
def mcanderson(x, y, err=None, xerr=None, yerr=None, resample=True, replace=True, nsamples=1000, debug=False): if err is not None: xerr = err yerr = err statistic = np.zeros(nsamples) pvalue = np.zeros(nsamples) for i in np.arange(nsamples): if resample: x_re = select(x, xerr, replace=replace) y_re = select(y, yerr, replace=replace) else: x_re = select(x, xerr, replace=replace, indices=np.arange(len(x))) y_re = select(y, yerr, replace=replace, indices=np.arange(len(y))) stat, _, per = stats.anderson_ksamp([x_re, y_re]) statistic[i] = stat pvalue[i] = per if debug and (np.mod(i, 100) == 0): a = plt.hist(y_re) b = plt.hist(x_re) print stat, per plt.show() out = confidence.interval(statistic, interval=0.68) out2 = confidence.interval(pvalue, interval=0.68) return out, out2
def evaluate_fit(data1, data2): # slice the data then evaluate fits for each one. S1 = data1[0] S2 = np.log10(data1[1]) S1_2 = data2[0] S2_2 = data2[1] a = np.arange(0.5, 70.5, 1) b = a + 2 stats = [] for i in range(len(b)): MIN = a[i] MAX = b[i] s1 = MIN + .5 S2a = S2[(MIN < S1) & (S1 < MAX)] S2b = S2_2[(MIN < S1_2) & (S1_2 < MAX)] if len(S2a) != 0 and len(S2b) != 0: print("S1=" + str(s1)) stat = anderson_ksamp([S2a, S2b]) print(stat) stats.append(stat[0]) if len(S2a) > len(S2b): S2a = S2a[:len(S2b)] else: S2b = S2b[:len(S2a)] # plt.hist(S2a, bins=100, alpha=0.5) # plt.hist(S2b, bins=100, alpha=0.5, color="r") # plt.show() # if stat[2] < 0.05: # print(len(S2a), len(S2b)) # bins = np.linspace(3, 5, 100) # plt.hist(S2a, bins, alpha=0.5, label='x') # plt.hist(S2b, bins, alpha=0.5, label='y') # plt.show() average_significance = np.average(stats) return average_significance
def py_adtest(mat, lv): ds = set(lv) pv = [] for i in np.arange(mat.shape[0]): pv.append( anderson_ksamp([mat[i, np.array(lv) == l] for l in ds]).significance_level) return pv
def htests(data1, data2): d, pvalue = stats.ks_2samp(data1, data2) print(' KS test: ', pvalue) statistic, criticalv, significance = stats.anderson_ksamp([data1, data2]) print(' AD test: ', significance) statistic, pvalue = stats.ranksums(data1, data2) print(' Wilcoxon test: ', pvalue) return
def compare_dists(a, b): try: stat = stats.anderson_ksamp([a, b])[0] except UserWarning: pass n = len(a) + len(b) stat = stat / ((n * n) / (n - 1)) # normalize for n stat = stat / 0.507 + 0.1 # normalize to ~(0,1) return stat
def testCompletedInvertedCumulatives(data, method='AndersonDarling', offset=None, plot=False): """Test if data sets have the same number / intensity distribution by adding zero intensity counts to the smaller sized data sets and performing a distribution comparison test on the reversed cumulative distribution""" #idea: fill up data points to the same numbers at the high intensity values and use KS test #cf. work in progress on thoouroghly testing the differences in histograms #fill up the low count data n = numpy.array([x.size for x in data]) nm = n.max() m = numpy.array([x.max() for x in data]) mm = m.max() k = n.size #print nm, mm, k if offset is None: #assume data starts at 0 ! offset = mm / nm #ideall for all statistics this should be mm + eps to have as little influence as possible. datac = [x.copy() for x in data] for i in range(m.size): if n[i] < nm: datac[i] = numpy.concatenate( (-datac[i], numpy.ones(nm - n[i], dtype=datac[i].dtype) * (offset))) # + 10E-5 * numpy.random.rand(nm-n[i]))); else: datac[i] = -datac[i] #test by plotting if plot is True: import matplotlib.pyplot as plt for i in range(m.size): datac[i].sort() plt.step(datac[i], numpy.arange(datac[i].size)) #perfomr the tests if method == 'KolmogorovSmirnov' or method == 'KS': if k == 2: (s, p) = stats.ks_2samp(datac[0], datac[1]) else: raise RuntimeError('KolmogorovSmirnov only for 2 samples not %d' % k) elif method == 'CramervonMises' or method == 'CM': if k == 2: (s, p) = stats2.testCramerVonMises2Sample(datac[0], datac[1]) else: raise RuntimeError('CramervonMises only for 2 samples not %d' % k) elif method == 'AndersonDarling' or method == 'AD': (s, a, p) = stats.anderson_ksamp(datac) return (p, s)
def compute_ad_distance(self): """ Compute the distance using the Anderson Darling Test. """ D, _, p = anderson_ksamp([self.PDF1.data, self.PDF2.data]) self.ad_distance = D self.ad_pval = p
def ad_test(self, data1, data2): # AD検定で最大離隔率とCV、およびp値を取得 ad = anderson_ksamp([data1, data2]) statistic = ad.statistic cv = ad.critical_values pvalue = ad.significance_level #return statistic, cv, pvalue return pvalue
def calcAnderson_ksamp(self, twoSamples): self.logger.debug('IN calcAnderson_ksamp: Test Seq Len: %i' % len(twoSamples['testSeq'])) self.logger.debug('IN calcAnderson_ksamp: Grnd Truth Seq Len: %i' % len(twoSamples['grndTruthSeq'])) sampleArrayList = [] #sampleArrayList.append(twoSamples['grndTruthSeq']) sampleArrayList.append(twoSamples['testSeq']) sampleArrayList.append(twoSamples['grndTruthSeq']) anderson_kstat, critical_val, significance = anderson_ksamp(sampleArrayList) return anderson_kstat
def work(self, input_items, output_items): in0 = input_items[0] out = output_items[0] #print in0.shape x = in0.reshape(self.N) #print x.shape[0] #print self.buf #print x.imag #print np.append(x, self.buf).shape [D1, z, p1] = stats.anderson_ksamp(x.imag, self.buf.imag) [D2, z, p2] = stats.anderson_ksamp(x.real, self.buf.real) if p1 < 0.05 and p2 < 0.05: print('Not similar, p is ', p1, " ", p2, 'at sample', self.nitems_read(0)) self.ctr = self.ctr + 1 print self.ctr self.buf = np.copy(x) out[:] = in0 return len(output_items[0])
def testAD(pdUCETSSDistances, pdRandomTSSDistances): #Get two lists of values #Feed into AD test arUCETSSbpDistances = pdUCETSSDistances['Distance_bp'].values.tolist() arRandomTSSbpDistances = pdRandomTSSDistances['Distance_bp'].values.tolist( ) floatStat, critical, approxP = stats.anderson_ksamp( [arUCETSSbpDistances, arRandomTSSbpDistances]) return floatStat, critical, approxP
def adaptive_avg(self, array1, array2, ml_x, ml_y): # Adaptively multi-looked amplitude new_array1 = [] new_array2 = [] # Subset with centre pixel and neighbouring pixels # subset1 = np.zeros((array1.shape[0], ml_x, ml_y)) # subset2 = np.zeros((array1.shape[0], ml_x, ml_y)) cx = 0 cy = 0 for i in range(0, array1.shape[1], ml_x): cx += 1 for j in range(0, array1.shape[2], ml_y): cy += 1 # Create a (no_of_images x ml_x x ml_y) subset subset1 = array1[:, i:i + ml_x, j:j + ml_y] subset2 = array2[:, i:i + ml_x, j:j + ml_y] ind_h0 = [] ind_h1 = [] for ii in range(ml_x): for jj in range(ml_y): # Don't check centre pixel with centre pixel if not (ii == ml_x // 2 and jj == ml_y // 2): # Statistical similarity test resultAnderson = st.anderson_ksamp([ subset2[:-1, ii, jj], subset2[:-1, ml_x // 2, ml_y // 2] ]) # pixels are from the same distribution with a significance level of 1% if resultAnderson.significance_level < 0.01: # print("Pixel [%i, %i] is similar to centre pixel" % (ii, jj)) ind_h0.append([ii, jj]) else: # print("Pixel [%i, %i] is not similar to centre pixel" % (ii, jj)) ind_h1.append([ii, jj]) if len(ind_h0) > len(ind_h1): new_array1.append( np.mean(self.avg_withcentre(array1, ind_h0, subset1, ml_x, ml_y), axis=1)) new_array2.append( np.mean(self.avg_withcentre(array2, ind_h0, subset2, ml_x, ml_y), axis=1)) else: new_array1.append(np.mean(subset1, axis=(1, 2))) new_array2.append(np.mean(subset2, axis=(1, 2))) print('Line', i + 1) new_array1 = np.transpose(np.array(new_array1)) new_array2 = np.transpose(np.array(new_array2)) final_array1 = np.reshape(new_array1, (array1.shape[0], cx, cy // cx)) final_array2 = np.reshape(new_array2, (array1.shape[0], cx, cy // cx)) return final_array1, final_array2
def binary_dist_test(a, b, test='auc'): """ Wrapper for difference of distribution tests for univariate observations from two classes. Parameteres ----------- a, b: array-like The observation values in each class. test: str (['ad','mw', 'ks', 't']), callable Which test to use. 'ad': Anderson-Darling (general test for differing distributions) 'ks': Kolmogorov-Smirnov (general test for differing distributions) 't': t-test for difference in locations. 'mw': Mann Whitney U test for difference in locations (reports AUC staistic) if callable, should take two array-like arguments. Output ------ stat, pval stat: float The test statistic such that larger values mean bigger differences. pval: float The p value. """ if test == 't': stat, pval = ttest_ind(a, b, equal_var=False) stat = abs(stat) elif test in ['auc', 'mw', 'mannwhitneyu']: result = binary_mann_whitney_u(a, b) pval = result['pval'] # makes two-sided stat = result['auc'] elif test == 'ks': stat, pval = ks_2samp(a, b) elif test == 'ad': stat, _, pval = anderson_ksamp([a, b]) elif callable(test): stat, pval = test(a, b) else: raise ValueError('test = {} is is not acceptable value.'.format(test)) return stat, pval
def _anderson_compare_pops(first_pop_matrix, second_pop_matrix, name=None): """Helper function used to execute Anderson-Darling test. See anderson_de. """ AD_stats = dict() p_stats = dict() for gene_id in first_pop_matrix.columns: AD, _, p = anderson_ksamp( [first_pop_matrix[gene_id], second_pop_matrix[gene_id]]) AD_stats[gene_id] = AD p_stats[gene_id] = p return pd.Series(AD_stats, name=name), pd.Series(p_stats, name=name)
def calcAnderson_ksamp(self, twoSamples): self.logger.debug('IN calcAnderson_ksamp: Test Seq Len: %i' % len(twoSamples['testSeq'])) self.logger.debug('IN calcAnderson_ksamp: Grnd Truth Seq Len: %i' % len(twoSamples['grndTruthSeq'])) sampleArrayList = [] #sampleArrayList.append(twoSamples['grndTruthSeq']) sampleArrayList.append(twoSamples['testSeq']) sampleArrayList.append(twoSamples['grndTruthSeq']) anderson_kstat, critical_val, significance = anderson_ksamp( sampleArrayList) return anderson_kstat
def get_pvalue(full_sample, subsample, N_loops, bool): """Computes the statistical probability value of a selected sample of halos being drawn from the full distribution of halos in the simulation using either the Kolmogorov Smirnov or Anderson Darling test. """ if bool: stat, _ = stats.kstest(subsample, full_sample) elif not bool: stat, _, _ = stats.anderson_ksamp([subsample, full_sample]) count = 0 for i in range(N_loops): num_points = len(subsample) ran_sample = np.random.choice(full_sample, num_points, replace=True) if bool: stat_emp, _ = stats.kstest(ran_sample, full_sample) elif not bool: stat_emp, _, _ = stats.anderson_ksamp([ran_sample, full_sample]) if stat_emp > stat: count += 1 updated_pval = count / N_loops return updated_pval
def anderson(self, attrs1, attrs2): """ k-sample Anderson test from `~scipy.stats.anderson_ksamp`. Parameters ---------- attrs1 : list of attributes List of conditions in first sample attrs2 : list of attributes List of conditions in second sample Returns ------- sig : float significance level (see `~scipy.stats.anderson_ksamp`) Examples -------- >>> import numpy as np >>> import batman >>> from salter import LightCurve >>> # Create example transiting planet properties >>> params = batman.TransitParams() >>> params.t0 = 0.5 >>> params.rp = 0.1 >>> params.per = 1 >>> params.duration = 0.3 >>> params.inc = 90 >>> params.w = 90 >>> params.ecc = 0 >>> params.a = 10 >>> params.limb_dark = 'quadratic' >>> params.u = [0.2, 0.1] >>> # Create example transit light curves: >>> transits = [LightCurve(times=i + np.linspace(0, 1, 500), >>> fluxes=np.random.randn(500), >>> params=params) for i in range(10)] >>> r = Residuals(transits, params) >>> # How significant is the difference between the distributions of the fluxes in and out-of-transit? >>> r.anderson('out_of_transit', 'in_transit') 1.1428634099527666 >>> # How significant is the difference between the distributions of the in-transit fluxes before and after midtransit? >>> r.anderson(['in_transit', 'before_midtransit'], ['in_transit', 'after_midtransit']) 0.2792395871784852 """ sample1, sample2 = self._and_reduce(attrs1, attrs2) try: return anderson_ksamp([sample1, sample2]).significance_level except OverflowError: return 0
def testCompletedInvertedCumulatives(data, method = 'AndersonDarling', offset = None, plot = False): """Test if data sets have the same number / intensity distribution by adding zero intensity counts to the smaller sized data sets and performing a distribution comparison test on the reversed cumulative distribution""" #idea: fill up data points to the same numbers at the high intensity values and use KS test #cf. work in progress on thoouroghly testing the differences in histograms #fill up the low count data n = numpy.array([x.size for x in data]); nm = n.max(); m = numpy.array([x.max() for x in data]); mm = m.max(); k = n.size; #print nm, mm, k if offset is None: #assume data starts at 0 ! offset = mm / nm; #ideall for all statistics this should be mm + eps to have as little influence as possible. datac = [x.copy() for x in data]; for i in range(m.size): if n[i] < nm: datac[i] = numpy.concatenate((-datac[i], numpy.ones(nm-n[i], dtype = datac[i].dtype) * (offset))); # + 10E-5 * numpy.random.rand(nm-n[i]))); else: datac[i] = -datac[i]; #test by plotting if plot is True: import matplotlib.pyplot as plt; for i in range(m.size): datac[i].sort(); plt.step(datac[i], numpy.arange(datac[i].size)); #perfomr the tests if method == 'KolmogorovSmirnov' or method == 'KS': if k == 2: (s, p) = stats.ks_2samp(datac[0], datac[1]); else: raise RuntimeError('KolmogorovSmirnov only for 2 samples not %d' % k); elif method == 'CramervonMises' or method == 'CM': if k == 2: (s,p) = stats2.testCramerVonMises2Sample(datac[0], datac[1]); else: raise RuntimeError('CramervonMises only for 2 samples not %d' % k); elif method == 'AndersonDarling' or method == 'AD': (s,a,p) = stats.anderson_ksamp(datac); return (p,s);
def __init__(self, data): self.dist = stats.genextreme self.distname = "genextreme" self.data = data self.p = self.dist.fit(self.data) self.frozen = self.dist(self.p[0], loc=self.p[1], scale=self.p[2]) self.pdf = lambda x: self.frozen.pdf(x) self.sample = self.frozen.rvs(len(self.data)) self.sample2 = self.frozen.rvs(100000) self.moments = self.frozen.stats(moments="mvsk") self.MAPP = fmin(lambda x: -self.pdf(x), self.moments[0], disp=0)[0] try: self.ad = stats.anderson_ksamp([self.sample, self.data])[0] except: self.ad = np.infty
def test_result_attributes(self): # Example data from Scholz & Stephens (1987), originally # published in Lehmann (1995, Nonparametrics, Statistical # Methods Based on Ranks, p. 309) # Pass a mixture of lists and arrays t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0] t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0]) t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8]) with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='approximate p-value') res = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False) attributes = ('statistic', 'critical_values', 'significance_level') check_named_results(res, attributes)
def __init__(self, data): self.dist = stats.genextreme self.distname = "genextreme" self.data = data self.p = self.dist.fit(self.data) self.frozen = self.dist(self.p[0], loc=self.p[1], scale=self.p[2]) self.pdf = lambda x : self.frozen.pdf(x) self.sample = self.frozen.rvs(len(self.data)) self.sample2 = self.frozen.rvs(100000) self.moments = self.frozen.stats(moments="mvsk") self.MAPP = fmin(lambda x: -self.pdf(x), self.moments[0], disp=0)[0] try: self.ad = stats.anderson_ksamp([self.sample, self.data])[0] except: self.ad = np.infty
def __init__(self, data, dist, distname): self.dist = dist self.distname = distname self.data = data self.p = self.dist.fit(self.data) self.pdf = lambda x : self.dist.pdf(x, *self.p[:-2], loc=self.p[-2], scale=self.p[-1]) self.sample = stats.norm.rvs(self.p[0], size=len(self.data), scale=self.p[-1]) self.moments = self.dist.stats(*self.p, moments="mvsk") self.MAPP = fmin(lambda x: -self.pdf(x), self.moments[0], disp=0)[0] try: self.ad = stats.anderson_ksamp([self.sample, self.data])[0] except: self.ad = np.infty
def test_example1b(self): # Example data from Scholz & Stephens (1987), originally # published in Lehmann (1995, Nonparametrics, Statistical # Methods Based on Ranks, p. 309) # Pass arrays t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]) t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0]) t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8]) with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='approximate p-value') Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True) assert_almost_equal(Tk, 4.480, 3) assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459], tm, 4) assert_almost_equal(p, 0.0020, 4)
def get_unsorted_ks(self, matrix_id): flcm = AnalysisDatasets.objects\ .filter(analysis_id=self.id, count_matrix=matrix_id)\ .select_related('count_matrix')\ .first() n = len(flcm.count_matrix.df['All bins']) quartiles = [[], [], [], []] for i, value in enumerate(flcm.count_matrix.df['All bins']): quartiles[math.floor(4*i/n)].append(value) stat, cv, sig = stats.anderson_ksamp(quartiles) return { 'statistic': stat, 'critical_values': cv, 'significance': sig, }
def get_ks(self, vector_id, matrix_id): if not self.output: return False output = self.output_json sort_order = output['sort_orders'].get(vector_id) flcm = AnalysisDatasets.objects\ .filter(analysis_id=self.id, count_matrix=matrix_id)\ .select_related('count_matrix')\ .first() n = len(sort_order) values = list(flcm.count_matrix.df['All bins']) quartiles = [[], [], [], []] for i, index in enumerate(sort_order): quartiles[math.floor(4*i/n)].append(values[index]) stat, cv, sig = stats.anderson_ksamp(quartiles) return { 'statistic': stat, 'critical_values': cv, 'significance': sig, }
def main(): # assuming 'theFile' contains one name per line, read the file if getpass.getuser() == 'David': pickleFilename = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/pilotData2.p' # resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_3.csv' # resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_8_edit2.csv' resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv' saveDirectory = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/plots5/' elif getpass.getuser() == 'frenchd': pickleFilename = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/pilotData2.p' # resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_3.csv' # resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_8_edit2.csv' resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv' saveDirectory = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/plots5/' else: print 'Could not determine username. Exiting.' sys.exit() # use the old pickle file to get the full galaxy dataset info pickleFile = open(pickleFilename,'rU') fullDict = pickle.load(pickleFile) pickleFile.close() # save each plot? save = False results = open(resultsFilename,'rU') reader = csv.DictReader(results) virInclude = False cusInclude = False finalInclude = True maxEnv = 3000 minL = 0.001 # if match, then the includes in the file have to MATCH the includes above. e.g., if # virInclude = False, cusInclude = True, finalInclude = False, then only systems # matching those three would be included. Otherwise, all cusInclude = True would be included # regardless of the others match = False # all the lists to be used for associated lines nameList = [] lyaVList = [] lyaWList = [] lyaErrList = [] naList = [] bList = [] impactList = [] azList = [] incList = [] fancyIncList = [] cosIncList = [] cosFancyIncList = [] paList = [] vcorrList = [] majList = [] difList = [] envList = [] morphList = [] m15List = [] virList = [] likeList = [] likem15List = [] AGNnameList = [] # for ambiguous lines lyaVAmbList = [] lyaWAmbList = [] envAmbList = [] ambAGNnameList = [] for l in reader: include_vir = eval(l['include_vir']) include_cus = eval(l['include_custom']) include = eval(l['include']) go = False if match: if virInclude == include_vir and cusInclude == include_cus: go = True else: go = False else: if virInclude and include_vir: go = True elif cusInclude and include_cus: go = True elif finalInclude and include: go = True else: go = False if go: AGNname = l['AGNname'] AGNra_dec = eval(l['degreesJ2000RA_DecAGN']) galaxyRA_Dec = eval(l['degreesJ2000RA_DecGalaxy']) lyaV = l['Lya_v'] lyaW = l['Lya_W'].partition('pm')[0] lyaW_err = l['Lya_W'].partition('pm')[2] env = l['environment'] galaxyName = l['galaxyName'] impact = l['impactParameter (kpc)'] galaxyDist = l['distGalaxy (Mpc)'] pa = l['positionAngle (deg)'] RC3pa = l['RC3pa (deg)'] morph = l['final_morphology'] vcorr = l['vcorrGalaxy (km/s)'] maj = l['majorAxis (kpc)'] minor = l['minorAxis (kpc)'] inc = l['inclination (deg)'] az = l['azimuth (deg)'] b = l['b'].partition('pm')[0] b_err = l['b'].partition('pm')[2] na = eval(l['Na'].partition(' pm ')[0]) # print "l['Na'].partition(' pm ')[2] : ",l['Na'].partition(' pm ') na_err = eval(l['Na'].partition(' pm ')[2]) likelihood = l['likelihood'] likelihoodm15 = l['likelihood_1.5'] virialRadius = l['virialRadius'] m15 = l['d^1.5'] vel_diff = l['vel_diff'] if isNumber(inc): cosInc = cos(float(inc) * pi/180.) if isNumber(maj) and isNumber(minor): q0 = 0.2 fancyInc = calculateFancyInclination(maj,minor,q0) cosFancyInc = cos(fancyInc * pi/180) else: fancyInc = -99 cosFancyInc = -99 else: cosInc = -99 inc = -99 fancyInc = -99 cosFancyInc = -99 if isNumber(pa): pa = float(pa) elif isNumber(RC3pa): pa = float(RC3pa) else: pa = -99 if isNumber(az): az = float(az) else: az = -99 if isNumber(maj): maj = float(maj) virialRadius = float(virialRadius) else: maj = -99 virialRadius = -99 # all the lists to be used for associated lines if float(env) <= maxEnv and float(likelihood) >=minL: nameList.append(galaxyName) AGNnameList.append(AGNname) lyaVList.append(float(lyaV)) lyaWList.append(float(lyaW)) lyaErrList.append(float(lyaW_err)) naList.append(na) bList.append(float(b)) impactList.append(float(impact)) azList.append(az) incList.append(float(inc)) fancyIncList.append(fancyInc) cosIncList.append(cosInc) cosFancyIncList.append(cosFancyInc) paList.append(pa) vcorrList.append(vcorr) majList.append(maj) difList.append(float(vel_diff)) envList.append(float(env)) morphList.append(morph) m15List.append(m15) virList.append(virialRadius) likeList.append(likelihood) likem15List.append(likelihoodm15) else: lyaV = l['Lya_v'] lyaW = l['Lya_W'].partition('pm')[0] lyaW_err = l['Lya_W'].partition('pm')[2] env = l['environment'] AGNname = l['AGNname'] lyaVAmbList.append(float(lyaV)) lyaWAmbList.append(float(lyaW)) envAmbList.append(float(env)) ambAGNnameList.append(AGNname) results.close() # lists for the full galaxy dataset allPA = fullDict['allPA'] allInclinations = fullDict['allInclinations'] allCosInclinations = fullDict['allCosInclinations'] allFancyInclinations = fullDict['allFancyInclinations'] allCosFancyInclinations = fullDict['allCosFancyInclinations'] total = 0 totalNo = 0 totalYes = 0 totalIsolated = 0 totalGroup = 0 ######################################################################################## ######################################################################################### # print all the things # # absorber info lists blues = [] reds = [] blueAbs = [] redAbs = [] blueW = [] redW = [] blueB = [] redB = [] blueErr = [] redErr = [] blueV = [] redV = [] blueImpact = [] redImpact = [] # galaxy info lists blueInc = [] redInc = [] blueFancyInc = [] redFancyInc = [] blueAz = [] redAz = [] bluePA = [] redPA = [] blueVcorr = [] redVcorr = [] blueEnv = [] redEnv = [] blueVir = [] redVir = [] blueLike = [] redLike = [] # ambiguous stuff void = [] ambig = [] for v,w,e in zip(lyaVAmbList,lyaWAmbList,envAmbList): if e == 0: void.append(w) else: ambig.append(w) # for targets finalTargets = {} for a in AGNnameList: if finalTargets.has_key(a): i = finalTargets[a] i+=1 finalTargets[a] = i else: finalTargets[a] = 1 # for ambiguous targets ambTargets = {} for a in ambAGNnameList: if ambTargets.has_key(a): i = ambTargets[a] i+=1 ambTargets[a] = i else: ambTargets[a] = 1 # for absorbers for d,w,e,v,i,b in zip(difList,lyaWList,lyaErrList,lyaVList,impactList,bList): if d>=0: blues.append(float(d)) blueW.append(float(w)) blueErr.append(float(e)) blueV.append(float(v)) blueImpact.append(float(i)) blueAbs.append(abs(d)) blueB.append(float(b)) else: reds.append(float(d)) redW.append(float(w)) redErr.append(float(e)) redV.append(float(v)) redImpact.append(float(i)) redAbs.append(abs(d)) redB.append(float(b)) ########################################################################################## blueSpiralInc = [] redSpiralInc = [] spiralIncList = [] # for spirals only for d,inc in zip(difList,fancyIncList): spiralIncList.append(float(inc)) if d>=0: blueSpiralInc.append(float(inc)) else: redSpiralInc.append(float(inc)) # compile a list of only spiral galaxy inclinations from the full galaxy table if getpass.getuser() == 'David': galaxyFile = open('/Users/David/Research_Documents/gt/NewGalaxyTable5.csv','rU') else: print 'Not on laptop, exiting' sys.exit() reader = csv.DictReader(galaxyFile) allDiameters = [] incGT25diam = [] allSpiralIncList = [] q0 = 0.2 for i in reader: major,minor = eval(i['linDiameters (kpc)']) morph = i['morphology'].lower() if bfind(morph,'s'): if not bfind(morph,'sph') and not bfind(morph,'s0'): if isNumber(major): if isNumber(minor): if float(major) > float(minor): fInc = calculateFancyInclination(major,minor,q0) allSpiralIncList.append(fInc) if float(major) >=25.0: incGT25diam.append(fInc) galaxyFile.close() ########################################################################################## nameDict = {} # for galaxies for d,inc,finc,az,pa,vcorr,e,vir,l,name in zip(difList,incList,fancyIncList,azList,paList,vcorrList,envList,virList, likeList,nameList): if nameDict.has_key(name): i = nameDict[name] i+=1 nameDict[name] = i else: nameDict[name] = 1 if d>=0: if inc !=-99: blueInc.append(float(inc)) if finc !=-99: blueFancyInc.append(float(finc)) if az !=-99: blueAz.append(float(az)) if pa !=-99: bluePA.append(float(pa)) if vcorr !=-99: blueVcorr.append(float(vcorr)) blueEnv.append(float(e)) if vir !=-99: blueVir.append(float(vir)) if l !=-99: blueLike.append(float(l)) else: if inc !=-99: redInc.append(float(inc)) if finc !=-99: redFancyInc.append(float(finc)) if az !=-99: redAz.append(float(az)) if pa !=-99: redPA.append(float(pa)) if vcorr !=-99: redVcorr.append(float(vcorr)) redEnv.append(float(e)) if vir !=-99: redVir.append(float(vir)) if l !=-99: redLike.append(float(l)) galaxyNames = nameDict.keys() # how many absorbers above vs below vel_cut? redVelCount200 = 0 redVelCount100 = 0 blueVelCount200 = 0 blueVelCount100 = 0 for b in blues: if b >=200: blueVelCount200 +=1 if b >= 100: blueVelCount100 +=1 for r in reds: if abs(r) >=200: redVelCount200 +=1 if abs(r) >=100: redVelCount100 +=1 assocFancyInc = blueFancyInc + redFancyInc print print '------------------------ Pilot Data ------------------------------' print print ' FOR THE FOLLOWING INCLUDE SET:' print ' Virial radius include = ',virInclude print ' Custom include = ',cusInclude print ' Final include = ',finalInclude print ' Match = ',match print print 'total number of lines: ', len(lyaWList) + len(lyaWAmbList) print 'total number of unique galaxies matched: ',len(galaxyNames) print 'total number of associated lines: ',len(difList) print 'total number of ambiguous lines: ',len(ambig) print 'total number of void lines: ',len(void) print '# of redshifted lines: ',len(reds) print '# of blueshifted lines: ',len(blues) print print print ' ASSOCIATED TARGETS ' print print 'final target number: ',len(finalTargets.keys()) for i in finalTargets.keys(): print i print print print ' AMBIGUOUS TARGTS ' print print 'final ambiguous number: ',len(ambTargets.keys()) for i in ambTargets.keys(): print i print print print '----------------------- Absorber info ----------------------------' print print 'avg blueshifted EW: ',mean(blueW) print 'median blueshifted EW: ',median(blueW) print 'avg blue err: ',mean(blueErr) print 'median blue err: ',median(blueErr) print print 'std(blue EW): ',std(blueW) print 'stats.sem(blue EW): ',stats.sem(blueW) print 'stats.describe(blue EW): ',stats.describe(blueW) print print 'avg blueshifted vel_diff: ',mean(blues) print 'median blueshifted vel_diff: ',median(blues) print 'std(blueshifted vel_diff): ',std(blues) print 'stats.sem(blue vel_dif): ',stats.sem(blues) print 'stats.describe(blue vel_dif: ',stats.describe(blues) print print '% blueshifted which have vel_diff >= 200 km/s: {0}'.format(float(blueVelCount200)/len(blues)) print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(blueVelCount200) print '% blueshifted which have vel_diff >= 100 km/s: {0}'.format(float(blueVelCount100)/len(blues)) print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(blueVelCount100) print print 'avg blue velocity: ',mean(blueV) print 'median blue velocity: ',median(blueV) print 'std(blue Velocity): ',std(blueV) print 'avg blue impact: ',mean(blueImpact) print 'median blue impact: ',median(blueImpact) print 'stats.sem(blue impact): ',stats.sem(blueImpact) print 'stats.describe(blue impact): ',stats.describe(blueImpact) print print 'avg redshifted EW: ',mean(redW) print 'median redshifted EW: ',median(redW) print 'avg red err: ',mean(redErr) print 'median red err: ',median(redErr) print print 'std(red EW): ',std(redW) print 'stats.sem(red EW): ',stats.sem(redW) print 'stats.describe(red EW): ',stats.describe(redW) print print 'avg redshifted vel_diff: ',mean(reds) print 'median redshifted vel_diff: ',median(reds) print 'std(redshifted vel_dif): ',std(reds) print 'stats.sem(red vel_dif): ',stats.sem(reds) print 'stats.describe(red vel_dif): ',stats.describe(reds) print print '% redshifted which have abs(vel_diff) >= 200 km/s: {0}'.format(float(redVelCount200)/len(reds)) print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(redVelCount200) print '% redshifted which have abs(vel_diff) >= 100 km/s: {0}'.format(float(redVelCount100)/len(reds)) print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(redVelCount100) print print 'avg red velocity: ',mean(redV) print 'median red velocity: ',median(redV) print print 'avg red impact: ',mean(redImpact) print 'median red impact: ',median(redImpact) print 'stats.sem(red impact): ',stats.sem(redImpact) print 'stats.describe(red impact): ',stats.describe(redImpact) print 'std(red impact): ',std(redImpact) print print '----------------------- Galaxy info ----------------------------' print # regular inclinations incCut = 50 totalBlueInc = len(blueInc) totalRedInc = len(redInc) blueIncCount = 0 for i in blueInc: if i >= incCut: blueIncCount +=1 redIncCount = 0 for i in redInc: if i >= incCut: redIncCount +=1 totalInc = len(allInclinations) totalCount = 0 for i in allInclinations: if i >= incCut: totalCount +=1 # fancy inclinations totalBlueFancyInc = len(blueFancyInc) totalRedFancyInc = len(redFancyInc) blueFancyIncCount = 0 for i in blueFancyInc: if i >= incCut: blueFancyIncCount +=1 redFancyIncCount = 0 for i in redFancyInc: if i >= incCut: redFancyIncCount +=1 combinedCount = redFancyIncCount + blueFancyIncCount totalCombinedCount = totalRedFancyInc + totalBlueFancyInc totalFancyInc = len(allFancyInclinations) totalFancyCount = 0 for i in allFancyInclinations: if i >= incCut: totalFancyCount +=1 print print ' INCLINATIONS: ' print print 'Blue: {0} % of associated galaxies have >={1}% inclination'.format(float(blueIncCount)/float(totalBlueInc),incCut) print 'Red: {0} % of associated galaxies have >={1}% inclination'.format(float(redIncCount)/float(totalRedInc),incCut) print 'All: {0} % of ALL galaxies have >={1}% inclination'.format(float(totalCount)/float(totalInc),incCut) print print ' FANCY INCLINATIONS: ' print print 'Blue: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(blueFancyIncCount)/float(totalBlueFancyInc),incCut) print 'Red: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(redFancyIncCount)/float(totalRedFancyInc),incCut) print 'All: {0} % of ALL galaxies have >={1}% fancy inclination'.format(float(totalFancyCount)/float(totalFancyInc),incCut) print 'Combined: {0} % of associated galaxies have >= {1} fancy inclination'.format(float(combinedCount)/float(totalCombinedCount),incCut) print print 'Average all fancy inclination: ',mean(allFancyInclinations) print 'stats.sem(all): ',stats.sem(allFancyInclinations) print print 'avg blue inclination: ',mean(blueInc) print 'median blue inclination: ',median(blueInc) print 'avg blue fancy inclination: ',mean(blueFancyInc) print 'median blue fancy inclination: ',median(blueFancyInc) print print 'avg red inclination: ',mean(redInc) print 'median red inclination: ',median(redInc) print 'avg red fancy inclination: ',mean(redFancyInc) print 'median red fancy inclination: ',median(redFancyInc) print print 'mean associated: ',mean(assocFancyInc) print 'stats.sem(associated): ',stats.sem(assocFancyInc) print 'stats.describe(associated): ',stats.describe(assocFancyInc) print 'stats.sem(blue): ',stats.sem(blueFancyInc) print 'stats.describe(blue): ',stats.describe(blueFancyInc) print print 'stats.sem(red): ',stats.sem(redFancyInc) print 'stats.describe(red): ',stats.describe(redFancyInc) print print " AZIMUTHS and PA: " print print 'avg blue azimuth: ',mean(blueAz) print 'median blue azimuth: ',median(blueAz) print 'stats.sem(blue az): ',stats.sem(blueAz) print 'stats.describe(blue az): ',stats.describe(blueAz) print print 'avg red azimuth: ',mean(redAz) print 'median red azimuth: ',median(redAz) print 'stats.sem(red az): ',stats.sem(redAz) print 'stats.describe(red az): ',stats.describe(redAz) print print 'avg blue PA: ',mean(bluePA) print 'median blue PA: ',median(bluePA) print print 'avg red PA: ',mean(redPA) print 'median red PA: ',median(redPA) print print ' VCORR : ' print print 'avg blue vcorr: ',mean(blueVcorr) print 'median blue vcorr: ',median(blueVcorr) print print 'avg red vcorr: ',mean(redVcorr) print 'median red vcorr: ',median(redVcorr) print print ' ENVIRONMENT: ' print print 'avg blue environment: ',mean(blueEnv) print 'median blue environment: ',median(blueEnv) print print 'avg red environment: ',mean(redEnv) print 'median red environment: ',median(redEnv) print print ' R_vir: ' print print 'avg blue R_vir: ',mean(blueVir) print 'median blue R_vir: ',median(blueVir) print 'stats.sem(blue R_vir): ',stats.sem(blueVir) print 'stats.describe(blue R_vir): ',stats.describe(blueVir) print print 'avg red R_vir: ',mean(redVir) print 'median red R_vir: ',median(redVir) print 'stats.sem(red R_vir): ',stats.sem(redVir) print 'stats.describe(red R_vir): ',stats.describe(redVir) print print ' LIKELIHOOD: ' print print 'avg blue likelihood: ',mean(blueLike) print 'median blue likelihood: ',median(blueLike) print print 'avg red likelihood: ',mean(redLike) print 'median red likelihood: ',median(redLike) print print print '-------------------- Distribution analysis ----------------------' print print print ' FANCY INCLINATIONS: ' # perform the K-S and AD tests for inclination ans1 = stats.ks_2samp(blueFancyInc, redFancyInc) ans1a = stats.anderson_ksamp([blueFancyInc,redFancyInc]) print 'KS for blue vs red fancy inclinations: ',ans1 print 'AD for blue vs red fancy inclinations: ',ans1a ans2 = stats.ks_2samp(blueFancyInc, allFancyInclinations) print 'KS for blue vs all fancy inclinations: ',ans2 ans3 = stats.ks_2samp(redFancyInc, allFancyInclinations) print 'KS for red vs all fancy inclinations: ',ans3 print z_statrb, p_valrb = stats.ranksums(blueFancyInc, redFancyInc) z_statall, p_valall = stats.ranksums(assocFancyInc, allFancyInclinations) print 'ranksum red vs blue p-value: ',p_valrb print 'ranksum associated vs all: ',p_valall # ans4 = stats.ks_2samp(assocFancyInc, allFancyInclinations) # ans4a = stats.anderson_ksamp([assocFancyInc,allFancyInclinations]) # # print 'KS for all associated vs all fancy inclinations: ',ans4 # print 'AD for all associated vs all fancy inclinations: ',ans4a # print # ans5 = stats.ks_2samp(spiralIncList, allSpiralIncList) # ans5a = stats.anderson_ksamp([spiralIncList,allSpiralIncList]) # # print 'KS for all spiral associated vs all spiral fancy inclinations: ',ans5 # print 'AD for all spiral associated vs all spiral fancy inclinations: ',ans5a print print ' INCLINATIONS: ' print # perform the K-S and AD tests for inclination ans1 = stats.ks_2samp(blueInc, redInc) ans1a = stats.anderson_ksamp([blueInc,redInc]) print 'KS for blue vs red inclinations: ',ans1 print 'AD for blue vs red inclinations: ',ans1a ans2 = stats.ks_2samp(blueInc, allInclinations) print 'KS for blue vs all inclinations: ',ans2 ans3 = stats.ks_2samp(redInc, allInclinations) print 'KS for red vs all inclinations: ',ans3 assocInc = blueInc + redInc ans4 = stats.ks_2samp(assocInc, allInclinations) print 'KS for associated vs all inclinations: ',ans4 print print ' EW Distributions: ' print # perform the K-S and AD tests for EW ans1 = stats.ks_2samp(blueW, redW) ans1a = stats.anderson_ksamp([blueW,redW]) print 'KS for blue vs red EW: ',ans1 print 'AD for blue vs red EW: ',ans1a print print ' Impact parameter Distributions: ' print # perform the K-S and AD tests for impact parameter ans1 = stats.ks_2samp(blueImpact, redImpact) ans1a = stats.anderson_ksamp([blueImpact,redImpact]) print 'KS for blue vs red impact parameters: ',ans1 print 'AD for blue vs red impact parameters: ',ans1a print print ' \Delta v Distributions: ' print # perform the K-S and AD tests for \delta v ans1 = stats.ks_2samp(blueAbs, redAbs) ans1a = stats.anderson_ksamp([blueAbs,redAbs]) print 'KS for blue vs red \Delta v: ',ans1 print 'AD for blue vs red \Delta v: ',ans1a print print ' Azimuth Distributions: ' print # perform the K-S and AD tests for azimuth ans1 = stats.ks_2samp(blueAz, redAz) ans1a = stats.anderson_ksamp([blueAz,redAz]) print 'KS for blue vs red azimuth: ',ans1 print 'AD for blue vs red azimuth: ',ans1a print # now against a flat distribution flatRed = arange(0,90,1) flatBlue = arange(0,90,1) ans1 = stats.ks_2samp(blueAz, flatBlue) ans1a = stats.anderson_ksamp([blueAz,flatBlue]) print 'KS for blue vs flat azimuth: ',ans1 print 'AD for blue vs flat azimuth: ',ans1a print ans1 = stats.ks_2samp(redAz, flatRed) ans1a = stats.anderson_ksamp([redAz,flatRed]) print 'KS for red vs flat azimuth: ',ans1 print 'AD for erd vs flat azimuth: ',ans1a print print print ' Environment Distributions: ' print # perform the K-S and AD tests for environment ans1 = stats.ks_2samp(blueEnv, redEnv) ans1a = stats.anderson_ksamp([blueEnv,redEnv]) print 'KS for blue vs red environment: ',ans1 print 'AD for blue vs red environment: ',ans1a print print ' R_vir Distributions: ' print # perform the K-S and AD tests for r_vir ans1 = stats.ks_2samp(blueVir, redVir) ans1a = stats.anderson_ksamp([blueVir,redVir]) print 'KS for blue vs red R_vir: ',ans1 print 'AD for blue vs red R_vir: ',ans1a print print ' Doppler parameter Distributions: ' print # perform the K-S and AD tests for doppler parameter ans1 = stats.ks_2samp(blueB, redB) ans1a = stats.anderson_ksamp([blueB,redB]) print 'KS for blue vs red doppler parameter: ',ans1 print 'AD for blue vs red doppler parameter: ',ans1a print print ' Likelihood Distributions: ' print # perform the K-S and AD tests for doppler parameter ans1 = stats.ks_2samp(blueLike, redLike) ans1a = stats.anderson_ksamp([blueLike,redLike]) print 'KS for blue vs red likelihood: ',ans1 print 'AD for blue vs red likelihood: ',ans1a print print ' COMPLETED. '
def best_fit(opts, R_proj, r_scale_ini, grid, bg_density_ini = 0.0, fit_bg = True, fpb = 1, weights = None): if fit_bg: print ' Fitting the scale radius and the background for', len(R_proj), 'members.' else: print ' Fitting the scale radius only for', len(R_proj), 'members.' print ' Initial estimates of parameters: ', r_scale_ini, bg_density_ini if weights is None: weights = np.ones(len(R_proj)) # Sort data R_proj = R_proj[np.argsort(R_proj)] weights = weights[np.argsort(R_proj)] # Get number density for various points along R_proj num_points = fpb * int(np.sqrt(len(R_proj))) if num_points < 5: print ' Only ', len(R_proj), 'data - not enough for the profiles' rd, dp, edp = get_points(R_proj, num_points, weights) # Find best values for scale radius and background density if not fit_bg: bg_bounds = (bg_density_ini, bg_density_ini) else: bg_bounds = (0.001, None) # Get fit to profile x_fit = np.arange(0.001, 5.0, 0.005) if opts.model == 'beta': print ' Using beta model. [Beta = ' + str(opts.beta) + ']' r_scale_best, bg_density_best = minimize(bm_proj_maxlik_bg, [r_scale_ini, bg_density_ini], args = (R_proj, opts.beta, ), method = 'SLSQP', options={'disp': False}, bounds = ((0.001, None), bg_bounds)).x y_fit = bm_num_density(R_proj, r_scale_best, bg_density_best, opts.beta) * \ np.array(map(partial(bm_proj_sd, alpha = opts.beta), (x_fit / r_scale_best))) + \ bg_density_best * np.sum(weights) / len(R_proj) else: print ' Using NFW profile.' r_scale_best, bg_density_best = minimize(nfw_proj_maxlik_bg, [r_scale_ini, bg_density_ini], args = (R_proj, ), method = 'SLSQP', options={'disp': False}, bounds = ((0.001, None), bg_bounds)).x y_fit = nfw_num_density(R_proj, r_scale_best, bg_density_best) * \ np.array(map(nfw_proj_sd, (x_fit / r_scale_best))) + \ bg_density_best * np.sum(weights) / len(R_proj) # Evaluate chi^2 if fit_bg: npfree = 2 else: npfree = 1 chi2_param = chi2_gof(np.interp(rd, x_fit, y_fit), dp, edp, npfree) # Evaluate K-S test ks_param = ks_2samp(np.interp(rd, x_fit, y_fit), dp) # Evaluate A-D test ad_param = anderson_ksamp([np.interp(rd, x_fit, y_fit), dp]) if opts.confidence: # Evaluate confidence intervals for r_s cf_limits = confidence(opts, r_scale_best, bg_density_best, R_proj, grid, npfree) # Print results print '' print ' Best-fit r_s:', r_scale_best if opts.confidence: print ' 1-sigma interval: ', cf_limits[0], cf_limits[1] print ' Best-fit background density:', bg_density_best, 'gals/Mpc^2' print '' print ' Chi^2 of the fit is', chi2_param[0], 'for', len(edp) - npfree, 'd.o.f.' print ' Probability of the fit is', chi2_param[1], '[rejected if > 0.99]' print '' print ' KS test resuls:', ks_param[0], ks_param[1] print ' AD test results:', ad_param[0], ad_param[2] return [r_scale_best, bg_density_best], [rd, dp, edp], [x_fit, y_fit], chi2_param
def _test_impl(self, data1: t.List[Number], data2: t.List[Number]) -> float: return max(st.anderson_ksamp([data1, data2])[-1], 1)
def classifier_eval(mode,keras_mode,args): ############################################################################## # Setting parameters # name=args[0] sample1_name= args[1] sample2_name= args[2] shuffling_seed = args[3] #mode =0 if you want evaluation of a model =1 if grid hyperparameter search =2 if spearmint hyperparameter search comp_file_list=args[4] print(comp_file_list) cv_n_iter = args[5] clf = args[6] C_range = args[7] gamma_range = args[8] if len(args)>9: #AD mode =1 : Anderson Darling test used instead of Kolmogorov Smirnov #AD mode =2 : Visualisation of the decision boundary #AD mode anything else: use KS and no visualisation AD_mode = args[9] else: AD_mode = 0 if mode==0: #For standard evaluation score_list=[] print("standard evaluation mode") elif mode==1: #For grid search print("grid hyperparameter search mode") param_grid = dict(gamma=gamma_range, C=C_range) elif mode==2: #For spearmint hyperparameter search score_list=[] print("spearmint hyperparameter search mode") else: print("No valid mode chosen") return 1 ############################################################################## # Load and prepare data set # # dataset for grid search for comp_file_0,comp_file_1 in comp_file_list: print("Operating of files :"+comp_file_0+" "+comp_file_1) #extracts data from the files features_0=np.loadtxt(comp_file_0,dtype='d') features_1=np.loadtxt(comp_file_1,dtype='d') #determine how many data points are in each sample no_0=features_0.shape[0] no_1=features_1.shape[0] no_tot=no_0+no_1 #Give all samples in file 0 the label 0 and in file 1 the feature 1 label_0=np.zeros((no_0,1)) label_1=np.ones((no_1,1)) #Create an array containing samples and features. data_0=np.c_[features_0,label_0] data_1=np.c_[features_1,label_1] data=np.r_[data_0,data_1] np.random.shuffle(data) X=data[:,:-1] y=data[:,-1] print("X : ",X) print("y : ",y) atest_size=0.2 if cv_n_iter==1: train_range = range(int(math.floor(no_tot*(1-atest_size)))) test_range = range(int(math.ceil(no_tot*(1-atest_size))),no_tot) #print("train_range : ", train_range) #print("test_range : ", test_range) acv = Counter(train_range,test_range) #print(acv) else: acv = StratifiedShuffleSplit(y, n_iter=cv_n_iter, test_size=atest_size, random_state=42) print("Finished with setting up samples") # It is usually a good idea to scale the data for SVM training. # We are cheating a bit in this example in scaling all of the data, # instead of fitting the transformation on the training set and # just applying it on the test set. if AD_mode != 2: scaler = StandardScaler() X = scaler.fit_transform(X) if mode==1: ############################################################################## # Grid Search # # Train classifiers # # For an initial search, a logarithmic grid with basis # 10 is often helpful. Using a basis of 2, a finer # tuning can be achieved but at a much higher cost. if AD_mode==1: grid = GridSearchCV(clf, scoring=p_value_scoring_object.p_value_scoring_object_AD ,param_grid=param_grid, cv=acv) else: grid = GridSearchCV(clf, scoring=p_value_scoring_object.p_value_scoring_object ,param_grid=param_grid, cv=acv) grid.fit(X, y) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) # Now we need to fit a classifier for all parameters in the 2d version # (we use a smaller set of parameters here because it takes a while to train) C_2d_range = [1e-2, 1, 1e2] gamma_2d_range = [1e-1, 1, 1e1] classifiers = [] for C in C_2d_range: for gamma in gamma_2d_range: clf = SVC(C=C, gamma=gamma) clf.fit(X_2d, y_2d) classifiers.append((C, gamma, clf)) ############################################################################## # visualization # # draw visualization of parameter effects plt.figure(figsize=(8, 6)) xx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200)) for (k, (C, gamma, clf)) in enumerate(classifiers): # evaluate decision function in a grid Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # visualize decision function for these parameters plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1) plt.title("gamma=10^%d, C=10^%d" % (np.log10(gamma), np.log10(C)),size='medium') # visualize parameter's effect on decision function plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu) plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r) plt.xticks(()) plt.yticks(()) plt.axis('tight') plt.savefig('prediction_comparison.png') # plot the scores of the grid # grid_scores_ contains parameter settings and scores # We extract just the scores scores = [x[1] for x in grid.grid_scores_] scores = np.array(scores).reshape(len(C_range), len(gamma_range)) # Draw heatmap of the validation accuracy as a function of gamma and C # # The score are encoded as colors with the hot colormap which varies from dark # red to bright yellow. As the most interesting scores are all located in the # 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so # as to make it easier to visualize the small variations of score values in the # interesting range while not brutally collapsing all the low score values to # the same color. plt.figure(figsize=(8, 6)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot, norm=MidpointNormalize(vmin=-1.0, midpoint=-0.0001)) plt.xlabel('gamma') plt.ylabel('C') plt.colorbar() plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45) plt.yticks(np.arange(len(C_range)), C_range) plt.title('Validation accuracy') plt.savefig('Heat_map.png') else: if keras_mode==1: from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.layers import Dropout from keras.utils import np_utils, generic_utils dimof_input = X.shape[1] dimof_output =2 y = np_utils.to_categorical(y, dimof_output) print("dimof_input : ",dimof_input, "dimof_output : ", dimof_output) #y = np_utils.to_categorical(y, dimof_output) scores = [] counter = 1 for train_index, test_index in acv: print("Cross validation run ", counter) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] print("X_train : ",X_train) print("y_train : ",y_train) batch_size = 1 dimof_middle = args[10] dropout = 0.5 countof_epoch = 5 n_hidden_layers = args[11] model = Sequential() model.add(Dense(input_dim=dimof_input, output_dim=dimof_middle, init="glorot_uniform",activation='tanh')) model.add(Dropout(dropout)) for n in range(n_hidden_layers): model.add(Dense(input_dim=dimof_middle, output_dim=dimof_middle, init="glorot_uniform",activation='tanh')) model.add(Dropout(dropout)) model.add(Dense(input_dim=dimof_middle, output_dim=dimof_output, init="glorot_uniform",activation='sigmoid')) #Compiling (might take longer) model.compile(loss='categorical_crossentropy', optimizer='sgd') model.fit(X_train, y_train,show_accuracy=True,batch_size=batch_size, nb_epoch=countof_epoch, verbose=0) prob_pred = model.predict_proba(X_test) print("prob_pred : ", prob_pred) assert (not (np.isnan(np.sum(prob_pred)))) # for y is 2D change dimof_output =2, add y = np_utils.to_categorical(y, dimof_output) and change the following line prob_pred = np.array([sublist[0] for sublist in prob_pred]) y_test = np.array([sublist[0] for sublist in y_test]) print("y_test : ", y_test) print("prob_pred : ", prob_pred) #Just like in p_value_scoring_strategy.py y_test = np.reshape(y_test,(1,y_test.shape[0])) prob_pred = np.reshape(prob_pred,(1,prob_pred.shape[0])) prob_0 = prob_pred[np.logical_or.reduce([y_test==0])] prob_1 = prob_pred[np.logical_or.reduce([y_test==1])] if __debug__: print("Plot") if AD_mode==1: p_AD_stat=stats.anderson_ksamp([prob_0,prob_1]) print(p_AD_stat) scores.append(p_AD_stat[2]) else: p_KS=stats.ks_2samp(prob_0,prob_1) print(p_KS) scores.append(p_KS[1]) counter +=1 else: if keras_mode==2: X, y = Xy_to_keras_Xy(X,y) if AD_mode==1: scores = (-1)*cross_validation.cross_val_score(clf,X,y,cv=acv,scoring=p_value_scoring_object.p_value_scoring_object_AD) elif AD_mode==2: print("X[:,0].min() , ", X[:,0].min(), "X[:,0].max() : ", X[:,0].max()) scores = (-1)*cross_validation.cross_val_score(clf,X,y,cv=acv,scoring=p_value_scoring_object.p_value_scoring_object_visualisation) import os os.rename("visualisation.png",name+"_visualisation.png") else: scores = (-1)*cross_validation.cross_val_score(clf,X,y,cv=acv,scoring=p_value_scoring_object.p_value_scoring_object) print("scores : ",scores) score_list.append(np.mean(scores)) if mode==2: return np.mean(scores) ############################################################################################################################################################ ############################################################### Evaluation of results #################################################################### ############################################################################################################################################################ if mode==0: # The score list has been computed. Let's plot the distribution print(score_list) with open(name+"_p_values",'w') as p_value_file: for item in score_list: p_value_file.write(str(item)+'\n') histo_plot_pvalue(score_list,50,"p value","Frequency","p value distribution",name)
def main(): if getpass.getuser() == 'David': pickleFilename = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/pilotData2.p' resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_3.csv' saveDirectory = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/plots/' elif getpass.getuser() == 'frenchd': pickleFilename = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/pilotData2.p' resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_3.csv' saveDirectory = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/plots/' else: print 'Could not determine username. Exiting.' sys.exit() pickleFile = open(pickleFilename,'rU') fullDict = pickle.load(pickleFile) pickleFile.close() # save each plot? save = False results = open(resultsFilename,'rU') reader = csv.DictReader(results) virInclude = False cusInclude = False finalInclude = True # if match, then the includes in the file have to MATCH the includes above. e.g., if # virInclude = False, cusInclude = True, finalInclude = False, then only systems # matching those three would be included. Otherwise, all cusInclude = True would be included # regardless of the others match = False # all the lists to be used for associated lines lyaVList = [] lyaWList = [] naList = [] bList = [] impactList = [] azList = [] incList = [] fancyIncList = [] cosIncList = [] cosFancyIncList = [] paList = [] vcorrList = [] majList = [] difList = [] envList = [] morphList = [] m15List = [] virList = [] likeList = [] likem15List = [] for l in reader: include_vir = eval(l['include_vir']) include_cus = eval(l['include_custom']) include = eval(l['include']) go = False if match: if virInclude == include_vir and cusInclude == include_cus: go = True else: go = False else: if virInclude and include_vir: go = True elif cusInclude and include_cus: go = True elif finalInclude and include: go = True else: go = False if go: AGNra_dec = eval(l['degreesJ2000RA_DecAGN']) galaxyRA_Dec = eval(l['degreesJ2000RA_DecGalaxy']) lyaV = l['Lya_v'] lyaW = l['Lya_W'].partition('pm')[0] lyaW_err = l['Lya_W'].partition('pm')[2] env = l['environment'] galaxyName = l['galaxyName'] impact = l['impactParameter (kpc)'] galaxyDist = l['distGalaxy (Mpc)'] pa = l['positionAngle (deg)'] RC3pa = l['RC3pa (deg)'] morph = l['morphology'] vcorr = l['vcorrGalaxy (km/s)'] maj = l['majorAxis (kpc)'] min = l['minorAxis (kpc)'] inc = l['inclination (deg)'] az = l['azimuth (deg)'] b = l['b'].partition('pm')[0] b_err = l['b'].partition('pm')[2] na = eval(l['Na'].partition(' pm ')[0]) print "l['Na'].partition(' pm ')[2] : ",l['Na'].partition(' pm ') na_err = eval(l['Na'].partition(' pm ')[2]) likelihood = l['likelihood'] likelihoodm15 = l['likelihood_1.5'] virialRadius = l['virialRadius'] m15 = l['d^1.5'] vel_diff = l['vel_diff'] if isNumber(RC3pa) and not isNumber(pa): pa = RC3pa if isNumber(inc): cosInc = cos(float(inc) * pi/180.) if isNumber(maj) and isNumber(min): q0 = 0.2 fancyInc = calculateFancyInclination(maj,min,q0) cosFancyInc = cos(fancyInc * pi/180) else: fancyInc = -99 cosFancyInc = -99 else: cosInc = -99 inc = -99 fancyInc = -99 cosFancyInc = -99 # all the lists to be used for associated lines lyaVList.append(float(lyaV)) lyaWList.append(float(lyaW)) naList.append(na) bList.append(float(b)) impactList.append(float(impact)) azList.append(az) incList.append(float(inc)) fancyIncList.append(fancyInc) cosIncList.append(cosInc) cosFancyIncList.append(cosFancyInc) paList.append(pa) vcorrList.append(vcorr) majList.append(maj) difList.append(float(vel_diff)) envList.append(float(env)) morphList.append(morph) m15List.append(m15) virList.append(virialRadius) likeList.append(likelihood) likem15List.append(likelihoodm15) results.close() ########################################################################################## ########################################################################################## # lists for the full galaxy dataset allPA = fullDict['allPA'] allInclinations = fullDict['allInclinations'] allCosInclinations = fullDict['allCosInclinations'] allFancyInclinations = fullDict['allFancyInclinations'] allCosFancyInclinations = fullDict['allCosFancyInclinations'] total = 0 totalNo = 0 totalYes = 0 totalIsolated = 0 totalGroup = 0 ######################################################################################## ######################################################################################## # plot histograms of the cos(inclinations) for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_cosInc = False if plot_dist_cosInc: ''' Here's an example: n1 = 200 n2 = 300 rvs1 = stats.norm.rvs(size=n1, loc=0., scale=1) rvs2 = stats.norm.rvs(size=n2, loc=0.5, scale=1.5) ans = stats.ks_2samp(rvs1, rvs2) print 'ans: ',ans print ''' # define the datasets rvs1all = cosIncList rvs1 = [] rvs2 = allCosInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # do the K-S test and print the results ans = stats.ks_2samp(rvs1, rvs2) print 'KS for cosIncList vs all: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) xlim(0,1) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) xlim(0,1) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_inc = False if plot_dist_inc: # define the datasets rvs1all = incList rvs1 = [] rvs2 = allInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # perform the K-S test ans = stats.ks_2samp(rvs1, rvs2) print 'KS for incList vs all: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancyInc = False if plot_dist_fancyInc: # define the datasets rvs1all = fancyIncList rvs1 = [] rvs2 = allFancyInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # perform the K-S test ans = stats.ks_2samp(rvs1, rvs2) print 'KS for fancyIncList vs all: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancyCosInc = False if plot_dist_fancyCosInc: # define the datasets rvs1all = cosFancyIncList rvs1 = [] rvs2 = allCosFancyInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # perform the K-S test ans = stats.ks_2samp(rvs1, rvs2) print 'KS for cosFancyIncList vs all: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancyCosInc_red_blue = False if plot_dist_fancyCosInc_red_blue: blues = [] reds = [] all = allCosFancyInclinations # remove null "-99" values and split into red and blue groups for i,d in zip(cosFancyIncList,difList): # check for != -99 if i>=0: # d = vel_galaxy - vel_absorber --> positive = blue shifted absorber (closer to us) if d>=0: blues.append(i) if d<0: reds.append(i) # perform the K-S test ans1 = stats.ks_2samp(blues, reds) ans1a = stats.anderson_ksamp([blues,reds]) print 'KS for blue vs red: ',ans1 print 'AD for blue vs red: ',ans1a ans2 = stats.ks_2samp(blues, all) print 'KS for blue vs all: ',ans2 ans3 = stats.ks_2samp(reds, all) print 'KS for red vs all: ',ans3 # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(311) plot1 = hist(blues,bins=bins) title('blueshifted Cos(fancy_inc)') ax2 = fig.add_subplot(312) plot2 = hist(reds,bins=bins) title('redshifted Cos(fancy_inc)') ax3 = fig.add_subplot(313) plot3 = hist(all,bins=bins) title('Full galaxy table Cos(fancy_inc)') show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_cosInc_red_blue = False if plot_dist_cosInc_red_blue: blues = [] reds = [] all = allCosInclinations # remove null "-99" values and split into red and blue groups for i,d in zip(cosIncList,difList): # check for != -99 if i>=0: # d = vel_galaxy - vel_absorber --> positive = blue shifted absorber (closer to us) if d>=0: blues.append(i) if d<0: reds.append(i) # perform the K-S test ans1 = stats.ks_2samp(blues, reds) ans1a = stats.anderson_ksamp([blues,reds]) print 'KS for blue vs red: ',ans1 print 'AD for blue vs red: ',ans1a ans2 = stats.ks_2samp(blues, all) print 'KS for blue vs all: ',ans2 ans3 = stats.ks_2samp(reds, all) print 'KS for red vs all: ',ans3 # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(311) plot1 = hist(blues,bins=bins) title('blueshifted Cos(inc)') ax2 = fig.add_subplot(312) plot2 = hist(reds,bins=bins) title('redshifted Cos(inc)') ax3 = fig.add_subplot(313) plot3 = hist(all,bins=bins) title('Full galaxy table Cos(inc)') show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancy_inc_red_blue = True if plot_dist_fancy_inc_red_blue: blues = [] reds = [] all = allFancyInclinations # remove null "-99" values and split into red and blue groups for i,d in zip(fancyIncList,difList): # check for != -99 if i>=0: # d = vel_galaxy - vel_absorber --> positive = blue shifted absorber (closer to us) if d>=0: blues.append(i) if d<0: reds.append(i) # perform the K-S test ans1 = stats.ks_2samp(blues, reds) ans1a = stats.anderson_ksamp([blues,reds]) print 'KS for blue vs red: ',ans1 print 'AD for blue vs red: ',ans1a ans2 = stats.ks_2samp(blues, all) print 'KS for blue vs all: ',ans2 ans3 = stats.ks_2samp(reds, all) print 'KS for red vs all: ',ans3 # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(311) plot1 = hist(blues,bins=bins) title('blueshifted fancy_inc') ax2 = fig.add_subplot(312) plot2 = hist(reds,bins=bins) title('redshifted fancy_inc') ax3 = fig.add_subplot(313) plot3 = hist(all,bins=bins) title('Full galaxy table fancy_inc') show()
def ADTest(x,y): try: return anderson_ksamp([x,y])[2] except Exception as e: print e return -1
def main(): if getpass.getuser() == 'David': pickleFilename = '/Users/David/Research_Documents/inclination/pilotData.p' saveDirectory = '/Users/David/Research_Documents/inclination/pilot_paper/figures' elif getpass.getuser() == 'frenchd': pickleFilename = '/usr/users/frenchd/inclination/pilotData.p' saveDirectory = '/usr/users/frenchd/inclination/pilot_paper/figures' else: print 'Could not determine username. Exiting.' sys.exit() pickleFile = open(pickleFilename,'rU') fullDict = pickle.load(pickleFile) pickleFile.close() # save each plot? save = False # overall structure: fullDict is a dictionary with all the lines and their data in it # separated into 'associated' and 'ambiguous' as the two keys. Associated contains # all the lists of data for lines associated with a galaxy. Ambiguous contains all # the lists of data for lines not unambiguously associated (could be many galaxies # or none) ########################################################################################## ########################################################################################## # all the lists to be used for associated lines lyaVList = fullDict['lyaVList'] lyaWList = fullDict['lyaWList'] lyaErrorList = fullDict['lyaErrorList'] naList = fullDict['naList'] bList = fullDict['bList'] impactList = fullDict['impactList'] azList = fullDict['azList'] newAzList = fullDict['newAzList'] incList = fullDict['incList'] fancyIncList = fullDict['fancyIncList'] cosIncList = fullDict['cosIncList'] fancyCosIncList = fullDict['fancyCosIncList'] paList = fullDict['paList'] vcorrList = fullDict['vcorrList'] majList = fullDict['majList'] difList = fullDict['difList'] envList = fullDict['envList'] morphList = fullDict['morphList'] galaxyNameList = fullDict['galaxyNameList'] lyaV_blue = [] lyaV_red = [] lyaW_blue = [] lyaW_red = [] lyaErr_blue = [] lyaErr_red = [] na_blue = [] na_red = [] b_blue = [] b_red = [] impact_blue = [] impact_red = [] az_blue = [] az_red = [] newAz_blue = [] newAz_red = [] inc_blue = [] inc_red = [] fancyInc_blue = [] fancyInc_red = [] cosInc_blue = [] cosInc_red = [] fancyCosInc_blue = [] fancyCosInc_red = [] pa_blue = [] pa_red = [] vcorr_blue = [] vcorr_red = [] maj_blue = [] maj_red = [] dif_blue = [] dif_red = [] env_blue = [] env_red = [] morph_blue = [] morph_red = [] c = -1 for d in difList: c +=1 if d > 0: # blueshifted absorption lyaV_blue.append(lyaVList[c]) lyaW_blue.append(lyaWList[c]) lyaErr_blue.append(lyaErrorList[c]) na_blue.append(naList[c]) b_blue.append(bList[c]) impact_blue.append(impactList[c]) az_blue.append(azList[c]) newAz_blue.append(newAzList[c]) inc_blue.append(incList[c]) fancyInc_blue.append(fancyIncList[c]) cosInc_blue.append(cosIncList[c]) fancyCosInc_blue.append(fancyCosIncList[c]) pa_blue.append(paList[c]) vcorr_blue.append(vcorrList[c]) maj_blue.append(majList[c]) dif_blue.append(difList[c]) env_blue.append(envList[c]) morph_blue.append(morphList[c]) else: # redshifted absorption lyaV_red.append(lyaVList[c]) lyaW_red.append(lyaWList[c]) lyaErr_red.append(lyaErrorList[c]) na_red.append(naList[c]) b_red.append(bList[c]) impact_red.append(impactList[c]) az_red.append(azList[c]) newAz_red.append(newAzList[c]) inc_red.append(incList[c]) fancyInc_red.append(fancyIncList[c]) cosInc_red.append(cosIncList[c]) fancyCosInc_red.append(fancyCosIncList[c]) pa_red.append(paList[c]) vcorr_red.append(vcorrList[c]) maj_red.append(majList[c]) dif_red.append(difList[c]) env_red.append(envList[c]) morph_red.append(morphList[c]) ########################################################################################## ########################################################################################## # all the lists to be used for ambiguous lines lyaVListAmb = fullDict['lyaVListAmb'] lyaWListAmb = fullDict['lyaWListAmb'] lyaErrorListAmb = fullDict['lyaErrorListAmb'] naListAmb = fullDict['naListAmb'] bListAmb = fullDict['bListAmb'] impactListAmb = fullDict['impactListAmb'] azListAmb = fullDict['azListAmb'] newAzListAmb = fullDict['newAzListAmb'] incListAmb = fullDict['incListAmb'] fancyIncListAmb = fullDict['fancyIncListAmb'] cosIncListAmb = fullDict['cosIncListAmb'] fancyCosIncListAmb = fullDict['fancyCosIncListAmb'] paListAmb = fullDict['paListAmb'] vcorrListAmb = fullDict['vcorrListAmb'] majListAmb = fullDict['majListAmb'] difListAmb = fullDict['difListAmb'] envListAmb = fullDict['envListAmb'] morphListAmb = fullDict['morphListAmb'] galaxyNameListAmb = fullDict['galaxyNameListAmb'] lyaV_blueAmb = [] lyaV_redAmb = [] lyaW_blueAmb = [] lyaW_redAmb = [] lyaErr_blueAmb = [] lyaErr_redAmb = [] na_blueAmb = [] na_redAmb = [] b_blueAmb = [] b_redAmb = [] impact_blueAmb = [] impact_redAmb = [] az_blueAmb = [] az_redAmb = [] newAz_blueAmb = [] newAz_redAmb = [] inc_blueAmb = [] inc_redAmb = [] fancyInc_blueAmb = [] fancyInc_redAmb = [] cosInc_blueAmb = [] cosInc_redAmb = [] fancyCosInc_blueAmb = [] fancyCosInc_redAmb = [] pa_blueAmb = [] pa_redAmb = [] vcorr_blueAmb = [] vcorr_redAmb = [] maj_blueAmb = [] maj_redAmb = [] dif_blueAmb = [] dif_redAmb = [] env_blueAmb = [] env_redAmb = [] morph_blueAmb = [] morph_redAmb = [] c = -1 for d in difListAmb: c +=1 if d > 0: # blueshifted absorption lyaV_blueAmb.append(lyaVListAmb[c]) lyaW_blueAmb.append(lyaWListAmb[c]) lyaErr_blueAmb.append(lyaErrorListAmb[c]) na_blueAmb.append(naListAmb[c]) b_blueAmb.append(bListAmb[c]) impact_blueAmb.append(impactListAmb[c]) az_blueAmb.append(azListAmb[c]) newAz_blueAmb.append(newAzListAmb[c]) inc_blueAmb.append(incListAmb[c]) fancyInc_blueAmb.append(fancyIncListAmb[c]) cosInc_blueAmb.append(cosIncListAmb[c]) fancyCosInc_blueAmb.append(fancyCosIncListAmb[c]) pa_blueAmb.append(paListAmb[c]) vcorr_blueAmb.append(vcorrListAmb[c]) maj_blueAmb.append(majListAmb[c]) dif_blueAmb.append(difListAmb[c]) env_blueAmb.append(envListAmb[c]) morph_blueAmb.append(morphListAmb[c]) else: # redshifted absorption lyaV_redAmb.append(lyaVListAmb[c]) lyaW_redAmb.append(lyaWListAmb[c]) lyaErr_redAmb.append(lyaErrorListAmb[c]) na_redAmb.append(naListAmb[c]) b_redAmb.append(bListAmb[c]) impact_redAmb.append(impactListAmb[c]) az_redAmb.append(azListAmb[c]) newAz_redAmb.append(newAzListAmb[c]) inc_redAmb.append(incListAmb[c]) fancyInc_redAmb.append(fancyIncListAmb[c]) cosInc_redAmb.append(cosIncListAmb[c]) fancyCosInc_redAmb.append(fancyCosIncListAmb[c]) pa_redAmb.append(paListAmb[c]) vcorr_redAmb.append(vcorrListAmb[c]) maj_redAmb.append(majListAmb[c]) dif_redAmb.append(difListAmb[c]) env_redAmb.append(envListAmb[c]) morph_redAmb.append(morphListAmb[c]) ########################################################################################## ########################################################################################## # lists for the full galaxy dataset allPA = fullDict['allPA'] allInclinations = fullDict['allInclinations'] allCosInclinations = fullDict['allCosInclinations'] allFancyInclinations = fullDict['allFancyInclinations'] allFancyCosInclinations = fullDict['allCosFancyInclinations'] total = 0 totalNo = 0 totalYes = 0 totalIsolated = 0 totalGroup = 0 ######################################################################################## ######################################################################################## # plot histograms of the cos(inclinations) for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_cosInc = False if plot_dist_cosInc: ''' Here's an example: n1 = 200 n2 = 300 rvs1 = stats.norm.rvs(size=n1, loc=0., scale=1) rvs2 = stats.norm.rvs(size=n2, loc=0.5, scale=1.5) ans = stats.ks_2samp(rvs1, rvs2) print 'ans: ',ans print ''' # define the datasets rvs1all = cosIncList rvs1 = [] rvs2 = allCosInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # do the K-S test and print the results ans = stats.ks_2samp(rvs1, rvs2) print 'real ans: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) xlim(0,1) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) xlim(0,1) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_inc = False if plot_dist_inc: # define the datasets rvs1all = incList rvs1 = [] rvs2 = allInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # perform the K-S test ans = stats.ks_2samp(rvs1, rvs2) print 'real ans: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancyInc = False if plot_dist_fancyInc: # define the datasets rvs1all = fancyIncList rvs1 = [] rvs2 = allFancyInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # perform the K-S test ans = stats.ks_2samp(rvs1, rvs2) print 'real ans: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancyCosInc = False if plot_dist_fancyCosInc: # define the datasets rvs1all = fancyCosIncList rvs1 = [] rvs2 = allFancyCosInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) # perform the K-S test ans = stats.ks_2samp(rvs1, rvs2) print 'real ans: ',ans # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) ax2 = fig.add_subplot(212) plot1 = hist(rvs2,bins=bins) show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_fancyCosInc_red_blue = False if plot_dist_fancyCosInc_red_blue: # define the datasets rvs1all = fancyCosInc_blue rvs1 = [] rvs2all = fancyCosInc_red rvs2 = [] rvs3 = allFancyCosInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red: ',ans1 ans2 = stats.ks_2samp(rvs1, rvs3) print 'blue vs all: ',ans2 ans3 = stats.ks_2samp(rvs2, rvs3) print 'red vs all: ',ans3 # plot the distributions fig = figure() bins = 15 ax1 = fig.add_subplot(311) plot1 = hist(rvs1,bins=bins) title('blueshifted Cos(fancy_inc)') ax2 = fig.add_subplot(312) plot2 = hist(rvs2,bins=bins) title('redshifted Cos(fancy_inc)') ax3 = fig.add_subplot(313) plot3 = hist(rvs3,bins=bins) title('Full galaxy table Cos(fancy_inc)') show() ######################################################################################## ######################################################################################## # plot histograms of the inclinations for both associated galaxies and the # full galaxy data set, combining both redshifted and blueshifted plot_dist_cosInc_red_blue = False if plot_dist_cosInc_red_blue: # define the datasets rvs1all = cosInc_blue rvs1 = [] rvs2all = cosInc_red rvs2 = [] rvs3 = allCosInclinations # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red, KS: ',ans1 ans1a = stats.anderson_ksamp([rvs1,rvs2]) print 'blue vs red, A-D:', ans1a print ans2 = stats.ks_2samp(rvs1, rvs3) print 'blue vs all, KS: ',ans2 ans2a = stats.anderson_ksamp([rvs1,rvs3]) print 'blue vs all, A-D: ',ans2a print ans3 = stats.ks_2samp(rvs2, rvs3) print 'red vs all, KS: ',ans3 ans3a = stats.anderson_ksamp([rvs2,rvs3]) print 'red vs all, A-D: ',ans3a print # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(311) plot1 = hist(rvs1,bins=bins) title('blueshifted Cos(inc)') ax2 = fig.add_subplot(312) plot2 = hist(rvs2,bins=bins) title('redshifted Cos(inc)') ax3 = fig.add_subplot(313) plot3 = hist(rvs3,bins=bins) title('Full galaxy table Cos(inc)') show() # fig = figure() # # subplots_adjust(hspace=0.200) # ax = fig.add_subplot(211) # bins = [0,.10,.20,.30,.40,.50,.60,.70,.80,.90] # subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) # # plot1 = hist(cosIncList,bins=bins,histtype='bar') # title('Absorber-associated galaxies cos(inclination)') # xlabel('Inclination (deg)') # ylabel('Number') # # ax = fig.add_subplot(212) # bins = [0,.10,.20,.30,.40,.50,.60,.70,.80,.90] # plot1 = hist(allCosInclinations,bins=bins,histtype='bar') # title('Full galaxy sample cos(inclination)') # xlabel('Inclination (deg)') # ylabel('Number') # # tight_layout() # # if save: # savefig('{0}/inc_dist.pdf'.format(saveDirectory),format='pdf') # else: # show() ######################################################################################## ######################################################################################## # plot histograms of the azimuths of the red vs blue shifted absorber samples # conduct KS and AD tests of these distributions # plot_dist_az_red_blue = False if plot_dist_az_red_blue: # define the datasets rvs1all = newAz_blue rvs1 = [] rvs2all = newAz_red rvs2 = [] # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red, KS: ',ans1 ans1a = stats.anderson_ksamp([rvs1,rvs2]) print 'blue vs red, A-D:', ans1a print # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) title('blueshifted Azimuths') ax2 = fig.add_subplot(212) plot2 = hist(rvs2,bins=bins) title('redshifted Azimuths') show() ######################################################################################## ######################################################################################## # plot histograms of the azimuths of the red vs blue shifted absorber samples # conduct KS and AD tests of these distributions # plot_dist_ew_red_blue = False if plot_dist_ew_red_blue: # define the datasets rvs1all = lyaW_blue rvs1 = [] rvs2all = lyaW_red rvs2 = [] # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red, KS: ',ans1 ans1a = stats.anderson_ksamp([rvs1,rvs2]) print 'blue vs red, A-D:', ans1a print # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) title('blueshifted LyaW') ax2 = fig.add_subplot(212) plot2 = hist(rvs2,bins=bins) title('redshifted LyaW') show() ######################################################################################## ######################################################################################## # plot histograms of the impact parameters of the red vs blue shifted absorber samples # conduct KS and AD tests of these distributions # plot_dist_impact_red_blue = False if plot_dist_impact_red_blue: # define the datasets rvs1all = impact_blue rvs1 = [] rvs2all = impact_red rvs2 = [] # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red, KS: ',ans1 ans1a = stats.anderson_ksamp([rvs1,rvs2]) print 'blue vs red, A-D:', ans1a print # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) title('blueshifted impact parameter (kpc)') ax2 = fig.add_subplot(212) plot2 = hist(rvs2,bins=bins) title('redshifted impact parameter (kpc)') show() ######################################################################################## ######################################################################################## # plot histograms of the b-parameter of the red vs blue shifted absorber samples # conduct KS and AD tests of these distributions # plot_dist_b_red_blue = False if plot_dist_b_red_blue: # define the datasets rvs1all = b_blue rvs1 = [] rvs2all = b_red rvs2 = [] # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red, KS: ',ans1 ans1a = stats.anderson_ksamp([rvs1,rvs2]) print 'blue vs red, A-D:', ans1a print # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) title('blueshifted dopplar b-parameter') ax2 = fig.add_subplot(212) plot2 = hist(rvs2,bins=bins) title('redshifted dopplar b-parameter') show() ######################################################################################## ######################################################################################## # plot histograms of the major axis of the red vs blue shifted absorber samples # conduct KS and AD tests of these distributions # plot_dist_maj_red_blue = False if plot_dist_maj_red_blue: # define the datasets rvs1all = maj_blue rvs1 = [] rvs2all = maj_red rvs2 = [] # remove -99 'no-data' values for i in rvs1all: if float(i) >=0: rvs1.append(i) for k in rvs2all: if float(k) >=0: rvs2.append(k) # perform the K-S test ans1 = stats.ks_2samp(rvs1, rvs2) print 'blue vs red, KS: ',ans1 ans1a = stats.anderson_ksamp([rvs1,rvs2]) print 'blue vs red, A-D:', ans1a print # plot the distributions fig = figure(figsize=(8,8)) subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4) bins = 15 ax1 = fig.add_subplot(211) plot1 = hist(rvs1,bins=bins) title('blueshifted major axis (kpc)') ax2 = fig.add_subplot(212) plot2 = hist(rvs2,bins=bins) title('redshifted major axis (kpc)') show() ######################################################################################## ######################################################################################## # plot histograms of the major axis of the red vs blue shifted absorber samples # conduct KS and AD tests of these distributions # plot_dist_morphs = True if plot_dist_morphs: # define the datasets rvs1all = morph_blue rvs1 = [] rvs2all = morph_red rvs2 = [] print 'blue absorbers morphology: ', for b in morph_blue: print b print print print 'red absorbers morphology: ', for r in morph_red: print r
y = w * normpdf(x, m, np.sqrt(c))[0] ax.plot(x, y * (bins[1] - bins[0]), "--", c=colors[k]) ax.arrow(float(m), 0, 0, 0.12 * ylims[j], head_width=0.02 * (xlims[j][1] - xlims[j][0]), head_length=0.05 * ylims[j], fc=colors[k], ec=colors[k]) # ax.axvline(m, c=colors[k], ls="--", lw=1.5) if l == 0: line = [r"\multirow{{{1}}}{{*}}{{{0}}}".format(parameters[j], len(d.best.means_)), tablab[k], len(v[cond]), round(m, 2), round(np.sqrt(c),2), round(w,2)] else: line = [" ", " ", round(m, 2), round(np.sqrt(c),2), round(w,2)] if j in [0,2]: ax.set_ylabel(r"Fraction of total") ax.set_ylim(0, ylims[j]) # print " & ".join([str(xx) for xx in line]) + "\\\\" # print "\multicolumn{6}{c}{- - - - - -}\\\\" vs.append(sp10[j]) print len(vs[0]), len(vs[1]), len(vs[2]) print "NE + SYM: ", ks_2samp(vs[0], vs[1]) print "NE + VIRGO/FORNAX: ", ks_2samp(vs[0], vs[2]) print "SYM + VIRGO/FORNAX: ", ks_2samp(vs[1], vs[2]) print "NE + SYM: ", anderson_ksamp((vs[0], vs[1])) print "NE + VIRGO/FORNAX: ", anderson_ksamp((vs[0], vs[2])) print "SYM + VIRGO/FORNAX: ", anderson_ksamp((vs[1], vs[2])) print plt.pause(0.001) plt.savefig(os.path.join(os.getcwd(), "figs/hist_outer.png"), ) # plt.show()
def time_anderson_ksamp(self): with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) anderson_ksamp(self.rand)
def main(): # assuming 'theFile' contains one name per line, read the file if getpass.getuser() == 'frenchd': # pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pilot_paper_code/pilotData2.p' # resultsFilename = '/Users/frenchd/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv' # saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/pilot_paper_code/plots6/' # WS09data = '/Users/frenchd/Research/inclination/git_inclination/WS2009_lya_data.tsv' # pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/rotation_paper/pickleSALT.p' # saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/rotation_paper/figures/' # pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALTcut.p' pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALT_14.p' gtPickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pickleGT.p' saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/plotting_code/figs/' else: print 'Could not determine username. Exiting.' sys.exit() # use the old pickle file to get the full galaxy dataset info pickleFile = open(pickleFilename,'rU') fullDict = pickle.load(pickleFile) pickleFile.close() # for the whole galaxy table: gtPickleFile = open(gtPickleFilename,'rU') gtDict = pickle.load(gtPickleFile) gtPickleFile.close() # save each plot? save = False # results = open(resultsFilename,'rU') # reader = csv.DictReader(results) # WS = open(WS09data,'rU') # WSreader = csv.DictReader(WS,delimiter=';') virInclude = False cusInclude = False finalInclude = 1 maxEnv = 3000 minL = 0.001 maxEnv = 100 # if match, then the includes in the file have to MATCH the includes above. e.g., if # virInclude = False, cusInclude = True, finalInclude = False, then only systems # matching those three would be included. Otherwise, all cusInclude = True would be included # regardless of the others match = False # all the lists to be used for associated lines raList = [] decList = [] lyaVList = [] lyaWList = [] lyaErrList = [] naList = [] bList = [] impactList = [] azList = [] incList = [] fancyIncList = [] cosIncList = [] cosFancyIncList = [] paList = [] vcorrList = [] majList = [] difList = [] envList = [] morphList = [] m15List = [] virList = [] likeList = [] likem15List = [] nameList = [] # for ambiguous lines lyaVAmbList = [] lyaWAmbList = [] envAmbList = [] ambAGNnameList = [] # for include = 2 lines lyaV_2List = [] lyaW_2List = [] env_2List = [] vir_2List = [] impact_2List = [] like_2List = [] # for include = 3 lines lyaV_3List = [] lyaW_3List = [] env_3List = [] vir_3List = [] impact_3List = [] like_3List = [] # for all lines with a galaxy within 500 kpc lyaV_nearestList = [] lyaW_nearestList = [] env_nearestList = [] impact_nearestList = [] diam_nearestList = [] vir_nearestList = [] cus_nearestList = [] # for all lyaV_all = [] lyaW_all = [] agnName_all = [] env_all = [] AGNnameList = [] # WS lists # WSvcorr = [] # WSdiam = [] # WSimpact =[] # WSew = [] # WSvel = [] # WSlya = [] # WSvel_dif = [] # WSvir = [] # WSlike = [] # # l_min = 0.001 # # for w in WSreader: # vcorr = w['HV'] # diam = w['Diam'] # rho = w['rho'] # ew = w['EWLya'] # vel = w['LyaVel'] # lya = w['Lya'] # # if lya == 'Lya ' and isNumber(diam) and isNumber(ew) and isNumber(rho): # if float(rho) <=500.0: # # this is a single galaxy association # vir = calculateVirialRadius(float(diam)) # # vel_dif = float(vcorr) - float(vel) # # # try this "sphere of influence" value instead # m15 = float(diam)**1.5 # # # first for the virial radius # likelihood = math.exp(-(float(rho)/vir)**2) * math.exp(-(vel_dif/200.)**2) # # if vir>= float(rho): # likelihood = likelihood*2 # # # then for the second 'virial like' m15 radius # likelihoodm15 = math.exp(-(float(rho)/m15)**2) * math.exp(-(vel_dif/200.)**2) # # if m15>= float(rho): # likelihoodm15 = likelihoodm15*2 # # if likelihood <= likelihoodm15: # likelihood = likelihoodm15 # # WSlike.append(likelihood) # # # l_min=0 # # if likelihood >= l_min: # # WSvcorr.append(float(vcorr)) # WSdiam.append(float(diam)) # WSvir.append(vir) # WSimpact.append(float(rho)) # WSew.append(float(ew)) # WSvel.append(float(vel)) # WSlya.append(lya) # WSvel_dif.append(vel_dif) targetNameL= fullDict['targetName'] galaxyNameL = fullDict['galaxyName'] environmentL = fullDict['environment'] RA_agnL = fullDict['RA_agn'] Dec_agnL = fullDict['Dec_agn'] RA_galL = fullDict['RA_gal'] Dec_galL = fullDict['Dec_gal'] likelihoodL = fullDict['likelihood'] likelihood_cusL = fullDict['likelihood_cus'] virialRadiusL = fullDict['virialRadius'] cusL = fullDict['cus'] impactParameterL = fullDict['impact'] vcorrL = fullDict['vcorr'] radialVelocityL = fullDict['radialVelocity'] vel_diffL = fullDict['vel_diff'] distGalaxyL = fullDict['distGalaxy'] majorAxisL = fullDict['majorAxis'] minorAxisL = fullDict['minorAxis'] inclinationL = fullDict['inclination'] positionAngleL = fullDict['PA'] azimuthL = fullDict['azimuth'] RC3flagL = fullDict['RC3flag'] RC3typeL = fullDict['RC3type'] RC3incL = fullDict['RC3inc'] RC3paL = fullDict['RC3pa'] final_morphologyL = fullDict['final_morphology'] includeL = fullDict['include'] include_virL = fullDict['include_vir'] include_customL = fullDict['include_custom'] Lya_vL = fullDict['Lya_v'] vlimitsL = fullDict['vlimits'] Lya_WL = fullDict['Lya_W'] NaL = fullDict['Na'] bL = fullDict['b'] identifiedL = fullDict['identified'] sourceL = fullDict['source'] print 'initial len(Lya_vL): ',len(Lya_vL) print i = -1 for include,include_vir,include_cus in zip(includeL,include_virL,include_customL): i+=1 go = False if match: if virInclude == include_vir and cusInclude == include_cus: go = True else: go = False else: if virInclude and include_vir: go = True elif cusInclude and include_cus: go = True elif finalInclude and include: go = True else: go = False galaxyName = galaxyNameL[i] targetName = targetNameL[i] RA_agn = RA_agnL[i] Dec_agn = Dec_agnL[i] RA_gal = RA_galL[i] Dec_gal = Dec_galL[i] lyaV = Lya_vL[i] lyaW = Lya_WL[i] lyaW_err = lyaW*0.1 env = environmentL[i] impact = impactParameterL[i] galaxyDist = distGalaxyL[i] pa = positionAngleL[i] RC3pa = RC3paL[i] morph = final_morphologyL[i] vcorr = vcorrL[i] maj = majorAxisL[i] minor = minorAxisL[i] inc = inclinationL[i] az = azimuthL[i] b = bL[i] b_err = b*0.1 na = NaL[i] na_err = na*0.1 likelihood = likelihoodL[i] likelihoodm15 = likelihood_cusL[i] virialRadius = virialRadiusL[i] m15 = cusL[i] vel_diff = vel_diffL[i] source = sourceL[i] lyaV_all.append(float(lyaV)) lyaW_all.append(float(lyaW)) env_all.append(int(env)) AGNnameList.append(targetName) # for ambiguous lines if include == 0: lyaVAmbList.append(float(lyaV)) lyaWAmbList.append(float(lyaW)) envAmbList.append(float(env)) ambAGNnameList.append(targetName) print 'include = ', include if include == 2: print 'include2 = ',include # for include = 2 lines lyaV_2List.append(float(lyaV)) lyaW_2List.append(float(lyaW)) env_2List.append(float(env)) vir_2List.append(float(virialRadius)) impact_2List.append(float(impact)) like_2List.append(float(likelihood)) if include == 3: # for include = 3 lines lyaV_3List.append(float(lyaV)) lyaW_3List.append(float(lyaW)) env_3List.append(float(env)) vir_3List.append(float(virialRadius)) impact_3List.append(float(impact)) like_3List.append(float(likelihood)) # for all absorbers with a galaxy within 500kpc if isNumber(impact): lyaV_nearestList.append(float(lyaV)) lyaW_nearestList.append(float(lyaW)) env_nearestList.append(float(env)) impact_nearestList.append(float(impact)) diam_nearestList.append(float(maj)) nameList.append(galaxyName) vir_nearestList.append(float(virialRadius)) cus_nearestList.append(float(m15)) # if go and source == 'salt': # if go and source == 'pilot': if go and env <=maxEnv: # if go: if isNumber(RC3pa) and not isNumber(pa): pa = RC3pa if isNumber(inc): cosInc = cos(float(inc) * pi/180.) if isNumber(maj) and isNumber(minor): q0 = 0.2 fancyInc = calculateFancyInclination(maj,minor,q0) cosFancyInc = cos(fancyInc * pi/180) else: fancyInc = -99 cosFancyInc = -99 else: cosInc = -99 inc = -99 fancyInc = -99 cosFancyInc = -99 # all the lists to be used for associated lines if float(env) <= maxEnv and float(likelihood) >= minL: raList.append(RA_gal) decList.append(Dec_gal) lyaVList.append(float(lyaV)) lyaWList.append(float(lyaW)) lyaErrList.append(float(lyaW_err)) naList.append(na) bList.append(float(b)) impactList.append(float(impact)) print az azList.append(float(az)) incList.append(float(inc)) fancyIncList.append(fancyInc) cosIncList.append(cosInc) cosFancyIncList.append(cosFancyInc) paList.append(pa) vcorrList.append(vcorr) majList.append(maj) difList.append(float(vel_diff)) envList.append(float(env)) morphList.append(morph) m15List.append(m15) virList.append(virialRadius) likeList.append(likelihood) likem15List.append(likelihoodm15) nameList.append(galaxyName) # lists for the full galaxy dataset majorAxisL = gtDict['majorAxis'] incL = gtDict['inc'] adjustedIncL = gtDict['adjustedInc'] paL = gtDict['PA'] BmagL = gtDict['Bmag'] Bmag_sdssL = gtDict['Bmag_sdss'] RID_medianL = gtDict['RID_median'] RID_meanL = gtDict['RID_mean'] RID_stdL = gtDict['RID_std'] VhelL = gtDict['Vhel'] RAdegL = gtDict['RAdeg'] DEdegL = gtDict['DEdeg'] NameL= gtDict['Name'] allPA = paL allInclinations = [] allCosInclinations = [] # print 'type: ',type(incL) for i in incL: if i != -99: i = float(i) allInclinations.append(i) i2 = pi/180. * i cosi2 = cos(i) allCosInclinations.append(cosi2) allFancyInclinations = [] allCosFancyCosInclinations = [] for i in adjustedIncL: if i != -99: i = float(i) allFancyInclinations.append(i) i2 = pi/180. * i cosi2 = cos(i) allCosFancyCosInclinations.append(cosi2) allDiameter = majorAxisL print 'finished with this shit' total = 0 totalNo = 0 totalYes = 0 totalIsolated = 0 totalGroup = 0 ######################################################################################## ######################################################################################### # print all the things # # absorber info lists blues = [] reds = [] blueAbs = [] redAbs = [] blueW = [] redW = [] blueB = [] redB = [] blueErr = [] redErr = [] blueV = [] redV = [] blueImpact = [] redImpact = [] # galaxy info lists blueInc = [] redInc = [] blueFancyInc = [] redFancyInc = [] blueAz = [] redAz = [] bluePA = [] redPA = [] blueVcorr = [] redVcorr = [] blueEnv = [] redEnv = [] blueVir = [] redVir = [] blueLike = [] redLike = [] # ambiguous stuff void = [] ambig = [] for v,w,e in zip(lyaVAmbList,lyaWAmbList,envAmbList): if e == 0: void.append(w) else: ambig.append(w) # for targets finalTargets = {} for a in AGNnameList: if finalTargets.has_key(a): i = finalTargets[a] i+=1 finalTargets[a] = i else: finalTargets[a] = 1 # for ambiguous targets ambTargets = {} for a in ambAGNnameList: if ambTargets.has_key(a): i = ambTargets[a] i+=1 ambTargets[a] = i else: ambTargets[a] = 1 # for absorbers for d,w,e,v,i,b in zip(difList,lyaWList,lyaErrList,lyaVList,impactList,bList): if d>=0: blues.append(float(d)) blueW.append(float(w)) blueErr.append(float(e)) blueV.append(float(v)) blueImpact.append(float(i)) blueAbs.append(abs(d)) blueB.append(float(b)) else: reds.append(float(d)) redW.append(float(w)) redErr.append(float(e)) redV.append(float(v)) redImpact.append(float(i)) redAbs.append(abs(d)) redB.append(float(b)) ########################################################################################## blueSpiralInc = [] redSpiralInc = [] spiralIncList = [] # for spirals only for d,inc in zip(difList,fancyIncList): spiralIncList.append(float(inc)) if d>=0: blueSpiralInc.append(float(inc)) else: redSpiralInc.append(float(inc)) # compile a list of only spiral galaxy inclinations from the full galaxy table if getpass.getuser() == 'frenchd': # galaxyFile = open('/Users/David/Research_Documents/gt/NewGalaxyTable5.csv','rU') galaxyFile = open('/Users/frenchd/Research/gt/FinalGalaxyTable12_filtered.csv','rU') else: print 'Not on laptop, exiting' sys.exit() reader = csv.DictReader(galaxyFile) allDiameters = [] incGT25diam = [] allSpiralIncList = [] q0 = 0.2 for i in reader: # major,minor = eval(i['linDiameters (kpc)']) major = eval(i['MajDiam']) adjustedInc = eval(i['adjustedInc']) morph = i['MType'].lower() if bfind(morph,'s'): if not bfind(morph,'sph') and not bfind(morph,'s0'): if major != -99.99: allSpiralIncList.append(adjustedInc) if float(major) >=25.0: incGT25diam.append(adjustedInc) galaxyFile.close() ########################################################################################## nameDict = {} # for galaxies for d,inc,finc,az,pa,vcorr,e,vir,l,name in zip(difList,incList,fancyIncList,azList,paList,vcorrList,envList,virList, likeList,nameList): if nameDict.has_key(name): i = nameDict[name] i+=1 nameDict[name] = i else: nameDict[name] = 1 if d>=0: if inc !=-99: blueInc.append(float(inc)) if finc !=-99: blueFancyInc.append(float(finc)) if az !=-99: blueAz.append(float(az)) if pa !=-99: bluePA.append(float(pa)) if vcorr !=-99: blueVcorr.append(float(vcorr)) blueEnv.append(float(e)) if vir !=-99: blueVir.append(float(vir)) if l !=-99: blueLike.append(float(l)) else: if inc !=-99: redInc.append(float(inc)) if finc !=-99: redFancyInc.append(float(finc)) if az !=-99: redAz.append(float(az)) if pa !=-99: redPA.append(float(pa)) if vcorr !=-99: redVcorr.append(float(vcorr)) redEnv.append(float(e)) if vir !=-99: redVir.append(float(vir)) if l !=-99: redLike.append(float(l)) galaxyNames = nameDict.keys() # how many absorbers above vs below vel_cut? redVelCount200 = 0 redVelCount100 = 0 blueVelCount200 = 0 blueVelCount100 = 0 for b in blues: if b >=200: blueVelCount200 +=1 if b >= 100: blueVelCount100 +=1 for r in reds: if abs(r) >=200: redVelCount200 +=1 if abs(r) >=100: redVelCount100 +=1 assocFancyInc = blueFancyInc + redFancyInc AGNnameDict = {} for i in AGNnameList: if AGNnameDict.has_key(i): c = AGNnameDict[i] c +=1 AGNnameDict[i] = c else: AGNnameDict[i] = 1 AGN_list = AGNnameDict.keys() print print '------------------------ Pilot Data ------------------------------' print print ' FOR THE FOLLOWING INCLUDE SET:' print ' Virial radius include = ',virInclude print ' Custom include = ',cusInclude print ' Final include = ',finalInclude print ' Match = ',match print # print 'total number of lines: ', len(lyaWList) + len(lyaWAmbList) print 'total number of lines: ', len(lyaV_all) print 'total number of unique galaxies matched: ',len(galaxyNames) print 'total number of AGN: ',len(AGN_list) print 'total number of associated lines: ',len(difList) print 'total number of ambiguous lines: ',len(ambig) print 'total number of void lines: ',len(void) print '# of redshifted lines: ',len(reds) print '# of blueshifted lines: ',len(blues) print print print ' ASSOCIATED TARGETS ' print print 'final target number: ',len(finalTargets.keys()) for i in finalTargets.keys(): print i print print print ' AMBIGUOUS TARGTS ' print print 'final ambiguous number: ',len(ambTargets.keys()) for i in ambTargets.keys(): print i print print print '----------------------- Absorber info ----------------------------' print print 'avg blueshifted EW: ',mean(blueW) print 'median blueshifted EW: ',median(blueW) print 'avg blue err: ',mean(blueErr) print 'median blue err: ',median(blueErr) print print 'std(blue EW): ',std(blueW) print 'stats.sem(blue EW): ',stats.sem(blueW) print 'stats.describe(blue EW): ',stats.describe(blueW) print print 'avg blueshifted vel_diff: ',mean(blues) print 'median blueshifted vel_diff: ',median(blues) print 'std(blueshifted vel_diff): ',std(blues) print 'stats.sem(blue vel_dif): ',stats.sem(blues) print 'stats.describe(blue vel_dif: ',stats.describe(blues) print print '% blueshifted which have vel_diff >= 200 km/s: {0}'.format(float(blueVelCount200)/len(blues)) print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(blueVelCount200) print '% blueshifted which have vel_diff >= 100 km/s: {0}'.format(float(blueVelCount100)/len(blues)) print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(blueVelCount100) print print 'avg blue velocity: ',mean(blueV) print 'median blue velocity: ',median(blueV) print 'std(blue Velocity): ',std(blueV) print 'avg blue impact: ',mean(blueImpact) print 'median blue impact: ',median(blueImpact) print 'stats.sem(blue impact): ',stats.sem(blueImpact) print 'stats.describe(blue impact): ',stats.describe(blueImpact) print print 'avg redshifted EW: ',mean(redW) print 'median redshifted EW: ',median(redW) print 'avg red err: ',mean(redErr) print 'median red err: ',median(redErr) print print 'std(red EW): ',std(redW) print 'stats.sem(red EW): ',stats.sem(redW) print 'stats.describe(red EW): ',stats.describe(redW) print print 'avg redshifted vel_diff: ',mean(reds) print 'median redshifted vel_diff: ',median(reds) print 'std(redshifted vel_dif): ',std(reds) print 'stats.sem(red vel_dif): ',stats.sem(reds) print 'stats.describe(red vel_dif): ',stats.describe(reds) print print '% redshifted which have abs(vel_diff) >= 200 km/s: {0}'.format(float(redVelCount200)/len(reds)) print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(redVelCount200) print '% redshifted which have abs(vel_diff) >= 100 km/s: {0}'.format(float(redVelCount100)/len(reds)) print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(redVelCount100) print print 'avg red velocity: ',mean(redV) print 'median red velocity: ',median(redV) print print 'avg red impact: ',mean(redImpact) print 'median red impact: ',median(redImpact) print 'stats.sem(red impact): ',stats.sem(redImpact) print 'stats.describe(red impact): ',stats.describe(redImpact) print 'std(red impact): ',std(redImpact) print print '----------------------- Galaxy info ----------------------------' print # regular inclinations incCut = 50 totalBlueInc = len(blueInc) totalRedInc = len(redInc) blueIncCount = 0 for i in blueInc: if i >= incCut: blueIncCount +=1 redIncCount = 0 for i in redInc: if i >= incCut: redIncCount +=1 totalInc = len(allInclinations) totalCount = 0 for i in allInclinations: if i >= incCut: totalCount +=1 # fancy inclinations totalBlueFancyInc = len(blueFancyInc) totalRedFancyInc = len(redFancyInc) blueFancyIncCount = 0 for i in blueFancyInc: if i >= incCut: blueFancyIncCount +=1 redFancyIncCount = 0 for i in redFancyInc: if i >= incCut: redFancyIncCount +=1 combinedCount = redFancyIncCount + blueFancyIncCount totalCombinedCount = totalRedFancyInc + totalBlueFancyInc totalFancyInc = len(allFancyInclinations) totalFancyCount = 0 for i in allFancyInclinations: if i >= incCut: totalFancyCount +=1 print print ' INCLINATIONS: ' print print 'Blue: {0} % of associated galaxies have >={1}% inclination'.format(float(blueIncCount)/float(totalBlueInc),incCut) print 'Red: {0} % of associated galaxies have >={1}% inclination'.format(float(redIncCount)/float(totalRedInc),incCut) print 'All: {0} % of ALL galaxies have >={1}% inclination'.format(float(totalCount)/float(totalInc),incCut) print print ' FANCY INCLINATIONS: ' print print 'Blue: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(blueFancyIncCount)/float(totalBlueFancyInc),incCut) print 'Red: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(redFancyIncCount)/float(totalRedFancyInc),incCut) print 'All: {0} % of ALL galaxies have >={1}% fancy inclination'.format(float(totalFancyCount)/float(totalFancyInc),incCut) print 'Combined: {0} % of associated galaxies have >= {1} fancy inclination'.format(float(combinedCount)/float(totalCombinedCount),incCut) print print 'Average all fancy inclination: ',mean(allFancyInclinations) print 'stats.sem(all): ',stats.sem(allFancyInclinations) print print 'avg blue inclination: ',mean(blueInc) print 'median blue inclination: ',median(blueInc) print 'avg blue fancy inclination: ',mean(blueFancyInc) print 'median blue fancy inclination: ',median(blueFancyInc) print print 'avg red inclination: ',mean(redInc) print 'median red inclination: ',median(redInc) print 'avg red fancy inclination: ',mean(redFancyInc) print 'median red fancy inclination: ',median(redFancyInc) print print 'mean associated: ',mean(assocFancyInc) print 'stats.sem(associated): ',stats.sem(assocFancyInc) print 'stats.describe(associated): ',stats.describe(assocFancyInc) print 'stats.sem(blue): ',stats.sem(blueFancyInc) print 'stats.describe(blue): ',stats.describe(blueFancyInc) print print 'stats.sem(red): ',stats.sem(redFancyInc) print 'stats.describe(red): ',stats.describe(redFancyInc) print print " AZIMUTHS and PA: " print print 'avg blue azimuth: ',mean(blueAz) print 'median blue azimuth: ',median(blueAz) print 'stats.sem(blue az): ',stats.sem(blueAz) print 'stats.describe(blue az): ',stats.describe(blueAz) print print 'avg red azimuth: ',mean(redAz) print 'median red azimuth: ',median(redAz) print 'stats.sem(red az): ',stats.sem(redAz) print 'stats.describe(red az): ',stats.describe(redAz) print print 'avg blue PA: ',mean(bluePA) print 'median blue PA: ',median(bluePA) print print 'avg red PA: ',mean(redPA) print 'median red PA: ',median(redPA) print print ' VCORR : ' print print 'avg blue vcorr: ',mean(blueVcorr) print 'median blue vcorr: ',median(blueVcorr) print print 'avg red vcorr: ',mean(redVcorr) print 'median red vcorr: ',median(redVcorr) print print ' ENVIRONMENT: ' print print 'avg blue environment: ',mean(blueEnv) print 'median blue environment: ',median(blueEnv) print print 'avg red environment: ',mean(redEnv) print 'median red environment: ',median(redEnv) print print ' R_vir: ' print print 'avg blue R_vir: ',mean(blueVir) print 'median blue R_vir: ',median(blueVir) print 'stats.sem(blue R_vir): ',stats.sem(blueVir) print 'stats.describe(blue R_vir): ',stats.describe(blueVir) print print 'avg red R_vir: ',mean(redVir) print 'median red R_vir: ',median(redVir) print 'stats.sem(red R_vir): ',stats.sem(redVir) print 'stats.describe(red R_vir): ',stats.describe(redVir) print print ' LIKELIHOOD: ' print print 'avg blue likelihood: ',mean(blueLike) print 'median blue likelihood: ',median(blueLike) print print 'avg red likelihood: ',mean(redLike) print 'median red likelihood: ',median(redLike) print print print '-------------------- Distribution analysis ----------------------' print print print ' FANCY INCLINATIONS: ' # perform the K-S and AD tests for inclination ans1 = stats.ks_2samp(blueFancyInc, redFancyInc) ans1a = stats.anderson_ksamp([blueFancyInc,redFancyInc]) print 'KS for blue vs red fancy inclinations: ',ans1 print 'AD for blue vs red fancy inclinations: ',ans1a ans2 = stats.ks_2samp(blueFancyInc, allFancyInclinations) print 'KS for blue vs all fancy inclinations: ',ans2 ans3 = stats.ks_2samp(redFancyInc, allFancyInclinations) print 'KS for red vs all fancy inclinations: ',ans3 print z_statrb, p_valrb = stats.ranksums(blueFancyInc, redFancyInc) z_statall, p_valall = stats.ranksums(assocFancyInc, allFancyInclinations) print 'ranksum red vs blue p-value: ',p_valrb print 'ranksum associated vs all: ',p_valall ans4 = stats.ks_2samp(assocFancyInc, allFancyInclinations) ans4a = stats.anderson_ksamp([assocFancyInc,allFancyInclinations]) print 'KS for all associated vs all fancy inclinations: ',ans4 print 'AD for all associated vs all fancy inclinations: ',ans4a print # ans5 = stats.ks_2samp(spiralIncList, allSpiralIncList) # ans5a = stats.anderson_ksamp([spiralIncList,allSpiralIncList]) # # print 'KS for all spiral associated vs all spiral fancy inclinations: ',ans5 # print 'AD for all spiral associated vs all spiral fancy inclinations: ',ans5a print print ' INCLINATIONS: ' print # perform the K-S and AD tests for inclination ans1 = stats.ks_2samp(blueInc, redInc) ans1a = stats.anderson_ksamp([blueInc,redInc]) print 'KS for blue vs red inclinations: ',ans1 print 'AD for blue vs red inclinations: ',ans1a ans2 = stats.ks_2samp(blueInc, allInclinations) print 'KS for blue vs all inclinations: ',ans2 ans3 = stats.ks_2samp(redInc, allInclinations) print 'KS for red vs all inclinations: ',ans3 assocInc = blueInc + redInc ans4 = stats.ks_2samp(assocInc, allInclinations) print 'KS for associated vs all inclinations: ',ans4 print print ' EW Distributions: ' print # perform the K-S and AD tests for EW ans1 = stats.ks_2samp(blueW, redW) ans1a = stats.anderson_ksamp([blueW,redW]) print 'KS for blue vs red EW: ',ans1 print 'AD for blue vs red EW: ',ans1a print print ' Impact parameter Distributions: ' print # perform the K-S and AD tests for impact parameter ans1 = stats.ks_2samp(blueImpact, redImpact) ans1a = stats.anderson_ksamp([blueImpact,redImpact]) print 'KS for blue vs red impact parameters: ',ans1 print 'AD for blue vs red impact parameters: ',ans1a print print ' \Delta v Distributions: ' print # perform the K-S and AD tests for \delta v ans1 = stats.ks_2samp(blueAbs, redAbs) ans1a = stats.anderson_ksamp([blueAbs,redAbs]) print 'KS for blue vs red \Delta v: ',ans1 print 'AD for blue vs red \Delta v: ',ans1a print print ' Azimuth Distributions: ' print # perform the K-S and AD tests for azimuth ans1 = stats.ks_2samp(blueAz, redAz) ans1a = stats.anderson_ksamp([blueAz,redAz]) print 'KS for blue vs red azimuth: ',ans1 print 'AD for blue vs red azimuth: ',ans1a print # now against a flat distribution flatRed = arange(0,90,1) flatBlue = arange(0,90,1) ans1 = stats.ks_2samp(blueAz, flatBlue) ans1a = stats.anderson_ksamp([blueAz,flatBlue]) print 'KS for blue vs flat azimuth: ',ans1 print 'AD for blue vs flat azimuth: ',ans1a print ans1 = stats.ks_2samp(redAz, flatRed) ans1a = stats.anderson_ksamp([redAz,flatRed]) print 'KS for red vs flat azimuth: ',ans1 print 'AD for erd vs flat azimuth: ',ans1a print print print ' Environment Distributions: ' print # perform the K-S and AD tests for environment ans1 = stats.ks_2samp(blueEnv, redEnv) ans1a = stats.anderson_ksamp([blueEnv,redEnv]) print 'KS for blue vs red environment: ',ans1 print 'AD for blue vs red environment: ',ans1a print print ' R_vir Distributions: ' print # perform the K-S and AD tests for r_vir ans1 = stats.ks_2samp(blueVir, redVir) ans1a = stats.anderson_ksamp([blueVir,redVir]) print 'KS for blue vs red R_vir: ',ans1 print 'AD for blue vs red R_vir: ',ans1a print print ' Doppler parameter Distributions: ' print # perform the K-S and AD tests for doppler parameter ans1 = stats.ks_2samp(blueB, redB) ans1a = stats.anderson_ksamp([blueB,redB]) print 'KS for blue vs red doppler parameter: ',ans1 print 'AD for blue vs red doppler parameter: ',ans1a print print ' Likelihood Distributions: ' print # perform the K-S and AD tests for doppler parameter ans1 = stats.ks_2samp(blueLike, redLike) ans1a = stats.anderson_ksamp([blueLike,redLike]) print 'KS for blue vs red likelihood: ',ans1 print 'AD for blue vs red likelihood: ',ans1a print print ' COMPLETED. '