Exemple #1
0
def AD_test(groups, outfile):
    jdelim = args.delimiter if args.delimiter != None else ' '
    for i,u in enumerate(groups):
        for j,v in enumerate(groups):
            if j > i or (j == i and len(args.columns) == 1):
                break
            for x,us in enumerate(u.samples):
                for y,vs in enumerate(v.samples):
                    if len(vs) < args.ignore or len(us) < args.ignore:
                        continue
                    if j == i and y >= x:
                        break
                    if args.random != None:
                        verdict = False
                        for k in range(args.random):
                            res = anderson_ksamp([random.sample(us, args.subsample), random.sample(vs, args.subsample)])
                            if res[0] < res[1][0]:
                                verdict = True
                            outfile.write(jdelim.join(u.tup + v.tup + map(str, res)) + '\n')
                        outfile.write('Verdict:' + str(verdict) + '\n')
                    else:
                        res = anderson_ksamp([us, vs])
                        verdict = False
                        if res[0] < res[1][0]:
                                verdict = True
                        outfile.write(jdelim.join(u.tup + v.tup + map(str, res)) + '\n')
                        outfile.write('Verdict:' + str(verdict) + '\n')
Exemple #2
0
def result_stat_tests(inn_samps, mcmc_samps, cnt, parnames):
    """
    Record and print ks and AD test statistics
    """
    
    ks_mcmc_arr = []
    ks_inn_arr = []
    ad_mcmc_arr = []
    ad_inn_arr = []

    # iterate through each parameter
    for i in range(inn_samps.shape[1]):
        # get ideal bayesian number. We want the 2 tailed p value from the KS test FYI
        ks_mcmc_result = ks_2samp(mcmc_samps[:int(mcmc_samps.shape[0]/2.0), i], mcmc_samps[int(mcmc_samps.shape[0]/2.0):, i])
        ad_mcmc_result = anderson_ksamp([mcmc_samps[:int(mcmc_samps.shape[0]/2.0), i], mcmc_samps[int(mcmc_samps.shape[0]/2.0):, i]])

        # get predicted vs. true number
        ks_inn_result = ks_2samp(inn_samps[:,i],mcmc_samps[:,i])
        ad_inn_result = anderson_ksamp([inn_samps[:,i],mcmc_samps[:,i]])
        #print('Test Case %d, Parameter(%s) k-s result: [Ideal(%.6f), Predicted(%.6f)]' % (int(cnt),parnames[i],np.array(ks_mcmc_result[1]),np.array(ks_inn_result[1])))
        #print('Test Case %d, Parameter(%s) A-D result: [Ideal(%.6f), Predicted(%.6f)]' % (int(cnt),parnames[i],np.array(ad_mcmc_result[0]),np.array(ad_inn_result[0])))

        # store result stats
        ks_mcmc_arr.append(ks_mcmc_result[1])
        ks_inn_arr.append(ks_inn_result[1])
        ad_mcmc_arr.append(ad_mcmc_result[0])
        ad_inn_arr.append(ad_inn_result[0])

    return ks_mcmc_arr, ks_inn_arr, ad_mcmc_arr, ad_inn_arr
Exemple #3
0
def overlap_tests(pred_samp,lalinf_samp,true_vals,kernel_cnn,kernel_lalinf):
    """ Perform Anderson-Darling, K-S, and overlap tests
    to get quantifiable values for accuracy of GAN
    PE method
    Parameters
    ----------
    pred_samp: numpy array
        predicted PE samples from CNN
    lalinf_samp: numpy array
        predicted PE samples from lalinference
    true_vals:
        true scalar point values for parameters to be estimated (taken from GW event paper)
    kernel_cnn: scipy kde instance
        gaussian kde of CNN results
    kernel_lalinf: scipy kde instance
        gaussian kde of lalinference results
    Returns
    -------
    ks_score:
        k-s test score
    ad_score:
        anderson-darling score
    beta_score:
        overlap score. used to determine goodness of CNN PE estimates
    """

    # do k-s test
    ks_mc_score = ks_2samp(pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:])
    ks_q_score = ks_2samp(pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:])
    ks_score = np.array([ks_mc_score,ks_q_score])

    # do anderson-darling test
    ad_mc_score = anderson_ksamp([pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:]])
    ad_q_score = anderson_ksamp([pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:]])
    ad_score = [ad_mc_score,ad_q_score]

    # compute overlap statistic
    comb_mc = np.concatenate((pred_samp[:,0].reshape(pred_samp[:,0].shape[0],1),lalinf_samp[0][:].reshape(lalinf_samp[0][:].shape[0],1)))
    comb_q = np.concatenate((pred_samp[:,1].reshape(pred_samp[:,1].shape[0],1),lalinf_samp[1][:].reshape(lalinf_samp[1][:].shape[0],1)))
    X, Y = np.mgrid[np.min(comb_mc):np.max(comb_mc):100j, np.min(comb_q):np.max(comb_q):100j]
    positions = np.vstack([X.ravel(), Y.ravel()])
    #cnn_pdf = np.reshape(kernel_cnn(positions).T, X.shape)
    #print(positions.shape,pred_samp.shape)
    cnn_pdf = kernel_cnn.pdf(positions)

    #X, Y = np.mgrid[np.min(lalinf_samp[0][:]):np.max(lalinf_samp[0][:]):100j, np.min(lalinf_samp[1][:]):np.max(lalinf_samp[1][:]):100j]
    positions = np.vstack([X.ravel(), Y.ravel()])
    #lalinf_pdf = np.reshape(kernel_lalinf(positions).T, X.shape)
    lalinf_pdf = kernel_lalinf.pdf(positions)

    beta_score = np.divide(np.sum( cnn_pdf*lalinf_pdf ),
                              np.sqrt(np.sum( cnn_pdf**2 ) * 
                              np.sum( lalinf_pdf**2 )))
    

    return ks_score, ad_score, beta_score
Exemple #4
0
def ADtest_pm(msk_in, plx_data_full):
    pmRA_in, pmDEC_in = plx_data_full['pmRA'][msk_in],\
        plx_data_full['pmDE'][msk_in]
    pmRA_out, pmDEC_out = plx_data_full['pmRA'][~msk_in],\
        plx_data_full['pmDE'][~msk_in]

    return [
        list(anderson_ksamp([pmRA_in, pmRA_out])),
        list(anderson_ksamp([pmDEC_in, pmDEC_out]))
    ]
Exemple #5
0
def _prob_ad(a, b):
    _, _, prob = anderson_ksamp([a, b])
    with np.errstate(divide='ignore'):
        lnprob = np.log(prob)
    if prob > 1:
        print
        print anderson_ksamp([a, b])
        print ks_2samp(a, b)
        print
        print a
        print b
        print prob, lnprob
    return prob, lnprob
    def main():
        if len(sys.argv) < 4:
            return 1
        _, list_a, list_b, significance = sys.argv[:4]
        list_a = json.loads(list_a)
        list_b = json.loads(list_b)
        significance = float(significance)

        shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1]
        mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue
        anderson_p_value = stats.anderson_ksamp([list_a,
                                                 list_b]).significance_level
        welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1]

        results = {
            'first_sample': list_a,
            'second_sample': list_b,
            'shapiro_p_value': shapiro_p_value,
            'mann_p_value': mann_whitney_p_value,
            'anderson_p_value': anderson_p_value,
            'welch_p_value': welch_p_value,
        }

        if (results['shapiro_p_value'][0] < significance
                and results['shapiro_p_value'][1] < significance):
            results['normal-y'] = True
        else:
            results['normal-y'] = False
        results['significantly_different'] = bool(
            float(results['mann_p_value']) < float(significance))

        print json.dumps(results)
        return 0
  def main():
    if len(sys.argv) < 4:
      return 1
    _, list_a, list_b, significance = sys.argv[:4]
    list_a = json.loads(list_a)
    list_b = json.loads(list_b)
    significance = float(significance)

    shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1]
    mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue
    anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level
    welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1]

    results = {
        'first_sample': list_a,
        'second_sample': list_b,
        'shapiro_p_value': shapiro_p_value,
        'mann_p_value': mann_whitney_p_value,
        'anderson_p_value': anderson_p_value,
        'welch_p_value': welch_p_value,
    }

    if (results['shapiro_p_value'][0] < significance and
        results['shapiro_p_value'][1] < significance):
      results['normal-y'] = True
    else:
      results['normal-y'] = False
    results['significantly_different'] = bool(
        float(results['mann_p_value']) < float(significance))

    print json.dumps(results)
    return 0
    def one_dimensional_test(self, X_tr, X_te):
        p_vals = []

        # For each dimension we conduct a separate KS test
        for i in range(X_tr.shape[1]):
            feature_tr = X_tr[:, i]
            feature_te = X_te[:, i]

            t_val, p_val = None, None

            if self.ot == OnedimensionalTest.KS:

                # Compute KS statistic and p-value
                t_val, p_val = ks_2samp(feature_tr, feature_te)
            elif self.ot == OnedimensionalTest.AD:
                t_val, _, p_val = anderson_ksamp(
                    [feature_tr.tolist(),
                     feature_te.tolist()])

            p_vals.append(p_val)

        # Apply the Bonferroni correction to bound the family-wise error rate. This can be done by picking the minimum
        # p-value from all individual tests.
        p_vals = np.array(p_vals)
        p_val = min(np.min(p_vals), 1.0)

        return p_val, p_vals
Exemple #9
0
    def test_example2b(self):
        # Example data taken from an earlier technical report of
        # Scholz and Stephens
        t1 = [194, 15, 41, 29, 33, 181]
        t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
        t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
        t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
              118, 25, 156, 310, 76, 26, 44, 23, 62]
        t5 = [130, 208, 70, 101, 208]
        t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
        t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
        t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
              12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
        t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
              54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
        t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
               22, 139, 210, 97, 30, 23, 13, 14]
        t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
        t12 = [50, 254, 5, 283, 35, 12]
        t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
        t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
               61, 34]
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', message='approximate p-value')
            Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
                                              t9, t10, t11, t12, t13, t14),
                                             midrank=True)

        assert_almost_equal(Tk, 3.294, 3)
        assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
                                  tm, 4)
        assert_almost_equal(p, 0.0041, 4)
Exemple #10
0
def test_dist_fit(data, orig_data, method):
    if method == 'ad':
        stat_results = st.anderson_ksamp([orig_data, data])
    elif method == 'ks':
        stat_results = st.ks_2samp(orig_data, data) #

    return stat_results
Exemple #11
0
    def get_ks_by_user_vector(self, matrix_id):
        output = self.output_json
        sort_vector = output.get('sort_vector', None)

        if not sort_vector:
            return False

        # descending sort order
        sort_order = numpy.argsort(sort_vector)[::-1]

        flcm = AnalysisDatasets.objects\
            .filter(analysis_id=self.id, count_matrix=matrix_id)\
            .select_related('count_matrix')\
            .first()

        n = len(sort_order)
        values = list(flcm.count_matrix.df['All bins'])
        quartiles = [[], [], [], []]
        for i, index in enumerate(sort_order):
            quartiles[math.floor(4*i/n)].append(values[index])

        stat, cv, sig = stats.anderson_ksamp(quartiles)
        return {
            'statistic': stat,
            'critical_values': cv,
            'significance': sig,
        }
def ad(d1, d2, verbose=False):
    """
    Calculates the Anderson-Darling TS on 2 distributions.

    Can be used on continuous or discrete distributions. Any binning/bucketing of the distributions/samples should be
    done before passing them to this function.

    Anderson & Darling 1954

    Advantages:
    - Unlike the KS, the AD (like the ES) can be used on both continuous & discrete distributions.
    - Works well even when dist has fewer than 25 observations.
    - More powerful than KS, especially for differences in the tails of distributions.

    Args:
        d1 (np.array or pandas.core.series.Series): first sample

        d2 (np.array or pandas.core.series.Series): second sample

        verbose (bool): helpful interpretation msgs printed to stdout (default False)

    Returns:
        (float, float): AD test stat and p-value of rejecting the null hypothesis (that the two distributions are identical)
    """

    d1 = assure_numpy_array(d1)
    d2 = assure_numpy_array(d2)

    ad, critical_values, pvalue = stats.anderson_ksamp([d1, d2])

    return ad, pvalue
def getAD(pdQuery, pdRef):
    arQuery = pdQuery.values.flatten().tolist()
    arRef = pdRef.values.flatten().tolist()

    tupAD = stats.anderson_ksamp([arQuery, arRef])

    return tupAD
Exemple #14
0
def computeAD2Sample(data,mu,sd,seed):
    Nsample = len(data)
    np.random.seed(seed)
    otherdata = np.random.normal(mu, sd, Nsample)
    from scipy import stats
    res = stats.anderson_ksamp((data, otherdata))
    return [res.statistic, res.significance_level,res.critical_values.tolist()]
    def main():
        if len(sys.argv) < 4:
            return 1
        _, list_a, list_b, significance = sys.argv[:4]
        list_a = json.loads(list_a)
        list_b = json.loads(list_b)
        significance = float(significance)

        shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1]
        mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue
        anderson_p_value = stats.anderson_ksamp([list_a,
                                                 list_b]).significance_level
        welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1]

        results = {
            'first_sample': list_a,
            'second_sample': list_b,
            'shapiro_p_value': shapiro_p_value,
            'mann_p_value': mann_whitney_p_value,
            'anderson_p_value': anderson_p_value,
            'welch_p_value': welch_p_value,
        }

        # TODO(robertocn): It seems we haven't used the results of shapiro test for
        # normality. We should remove this along with anderson darling and welch's.
        if (results['shapiro_p_value'][0] < significance
                and results['shapiro_p_value'][1] < significance):
            results['normal-y'] = True
        else:
            results['normal-y'] = False
        results['significantly_different'] = bool(
            float(results['mann_p_value']) < float(significance))

        print json.dumps(results)
        return 0
def p_value_scoring_object_AD(clf, X, y): 
        """ 
        p_value_getter is a scoring callable that returns the negative p value from the KS test on the prediction probabilities for the particle and antiparticle samples.  
        """

        #Finding out the prediction probabilities
        prob_pred=clf.predict_proba(X)[:,1]
        #print(prob_pred)

        #This can be deleted if not using Keras
        #For Keras turn cathegorical y back to normal y
        if y.ndim==2:
                if y.shape[0]!=1 and y.shape[1]!=1:
                        #Then we have a cathegorical vector
                        y = y[:,1]

        #making sure the inputs are row vectors
        y         = np.reshape(y,(1,y.shape[0]))
        prob_pred = np.reshape(prob_pred,(1,prob_pred.shape[0]))

        #Separate prob into particle and antiparticle samples
        prob_0    = prob_pred[np.logical_or.reduce([y==0])]
        prob_1    = prob_pred[np.logical_or.reduce([y==1])]
        #if __debug__:
                #print("Plot")
        p_AD_stat=stats.anderson_ksamp([prob_0,prob_1])
        print(p_AD_stat)
        p_AD=-p_AD_stat[2]
        return p_AD
Exemple #17
0
def mcanderson(x,
               y,
               err=None,
               xerr=None,
               yerr=None,
               resample=True,
               replace=True,
               nsamples=1000,
               debug=False):

    if err is not None:
        xerr = err
        yerr = err
    statistic = np.zeros(nsamples)
    pvalue = np.zeros(nsamples)
    for i in np.arange(nsamples):

        if resample:
            x_re = select(x, xerr, replace=replace)
            y_re = select(y, yerr, replace=replace)
        else:
            x_re = select(x, xerr, replace=replace, indices=np.arange(len(x)))
            y_re = select(y, yerr, replace=replace, indices=np.arange(len(y)))

        stat, _, per = stats.anderson_ksamp([x_re, y_re])
        statistic[i] = stat
        pvalue[i] = per
        if debug and (np.mod(i, 100) == 0):
            a = plt.hist(y_re)
            b = plt.hist(x_re)
            print stat, per
            plt.show()
    out = confidence.interval(statistic, interval=0.68)
    out2 = confidence.interval(pvalue, interval=0.68)
    return out, out2
Exemple #18
0
def evaluate_fit(data1, data2):
    # slice the data then evaluate fits for each one.
    S1 = data1[0]
    S2 = np.log10(data1[1])
    S1_2 = data2[0]
    S2_2 = data2[1]
    a = np.arange(0.5, 70.5, 1)
    b = a + 2
    stats = []
    for i in range(len(b)):
        MIN = a[i]
        MAX = b[i]
        s1 = MIN + .5
        S2a = S2[(MIN < S1) & (S1 < MAX)]
        S2b = S2_2[(MIN < S1_2) & (S1_2 < MAX)]
        if len(S2a) != 0 and len(S2b) != 0:
            print("S1=" + str(s1))
            stat = anderson_ksamp([S2a, S2b])
            print(stat)
            stats.append(stat[0])
            if len(S2a) > len(S2b):
                S2a = S2a[:len(S2b)]
            else:
                S2b = S2b[:len(S2a)]
            # plt.hist(S2a, bins=100, alpha=0.5)
            # plt.hist(S2b, bins=100, alpha=0.5, color="r")
            # plt.show()
            # if stat[2] < 0.05:
            #     print(len(S2a), len(S2b))
            #     bins = np.linspace(3, 5, 100)
            #     plt.hist(S2a, bins, alpha=0.5, label='x')
            #     plt.hist(S2b, bins, alpha=0.5, label='y')
            #     plt.show()
    average_significance = np.average(stats)
    return average_significance
Exemple #19
0
def py_adtest(mat, lv):
    ds = set(lv)
    pv = []
    for i in np.arange(mat.shape[0]):
        pv.append(
            anderson_ksamp([mat[i, np.array(lv) == l]
                            for l in ds]).significance_level)
    return pv
Exemple #20
0
def htests(data1, data2):
    d, pvalue = stats.ks_2samp(data1, data2)
    print(' KS test:        ', pvalue)
    statistic, criticalv, significance = stats.anderson_ksamp([data1, data2])
    print(' AD test:        ', significance)
    statistic, pvalue = stats.ranksums(data1, data2)
    print(' Wilcoxon test:  ', pvalue)
    return
def compare_dists(a, b):
    try:
        stat = stats.anderson_ksamp([a, b])[0]
    except UserWarning:
        pass
    n = len(a) + len(b)
    stat = stat / ((n * n) / (n - 1))  # normalize for n
    stat = stat / 0.507 + 0.1  # normalize to ~(0,1)
    return stat
Exemple #22
0
def testCompletedInvertedCumulatives(data,
                                     method='AndersonDarling',
                                     offset=None,
                                     plot=False):
    """Test if data sets have the same number / intensity distribution by adding zero intensity counts to the smaller sized data sets and performing a distribution comparison test on the reversed cumulative distribution"""

    #idea: fill up data points to the same numbers at the high intensity values and use KS test
    #cf. work in progress on thoouroghly testing the differences in histograms

    #fill up the low count data
    n = numpy.array([x.size for x in data])
    nm = n.max()
    m = numpy.array([x.max() for x in data])
    mm = m.max()
    k = n.size
    #print nm, mm, k

    if offset is None:
        #assume data starts at 0 !
        offset = mm / nm
        #ideall for all statistics this should be mm + eps to have as little influence as possible.

    datac = [x.copy() for x in data]
    for i in range(m.size):
        if n[i] < nm:
            datac[i] = numpy.concatenate(
                (-datac[i],
                 numpy.ones(nm - n[i], dtype=datac[i].dtype) * (offset)))
            # + 10E-5 * numpy.random.rand(nm-n[i])));
        else:
            datac[i] = -datac[i]

    #test by plotting
    if plot is True:
        import matplotlib.pyplot as plt
        for i in range(m.size):
            datac[i].sort()
            plt.step(datac[i], numpy.arange(datac[i].size))

    #perfomr the tests
    if method == 'KolmogorovSmirnov' or method == 'KS':
        if k == 2:
            (s, p) = stats.ks_2samp(datac[0], datac[1])
        else:
            raise RuntimeError('KolmogorovSmirnov only for 2 samples not %d' %
                               k)

    elif method == 'CramervonMises' or method == 'CM':
        if k == 2:
            (s, p) = stats2.testCramerVonMises2Sample(datac[0], datac[1])
        else:
            raise RuntimeError('CramervonMises only for 2 samples not %d' % k)

    elif method == 'AndersonDarling' or method == 'AD':
        (s, a, p) = stats.anderson_ksamp(datac)

    return (p, s)
Exemple #23
0
    def compute_ad_distance(self):
        """
        Compute the distance using the Anderson Darling Test.
        """

        D, _, p = anderson_ksamp([self.PDF1.data, self.PDF2.data])

        self.ad_distance = D
        self.ad_pval = p
Exemple #24
0
    def ad_test(self, data1, data2):

        # AD検定で最大離隔率とCV、およびp値を取得
        ad = anderson_ksamp([data1, data2])
        statistic = ad.statistic
        cv = ad.critical_values
        pvalue = ad.significance_level

        #return statistic, cv, pvalue
        return pvalue
    def calcAnderson_ksamp(self, twoSamples):
        self.logger.debug('IN calcAnderson_ksamp: Test Seq Len: %i' % len(twoSamples['testSeq']))
        self.logger.debug('IN calcAnderson_ksamp: Grnd Truth Seq Len: %i' % len(twoSamples['grndTruthSeq']))
        sampleArrayList = []
        #sampleArrayList.append(twoSamples['grndTruthSeq'])
        sampleArrayList.append(twoSamples['testSeq'])
        sampleArrayList.append(twoSamples['grndTruthSeq'])

        anderson_kstat, critical_val, significance = anderson_ksamp(sampleArrayList)
        return anderson_kstat
Exemple #26
0
 def work(self, input_items, output_items):
     in0 = input_items[0]
     out = output_items[0]
     #print in0.shape
     x = in0.reshape(self.N)
     #print x.shape[0]
     #print self.buf
     #print x.imag
     #print np.append(x, self.buf).shape
     [D1, z, p1] = stats.anderson_ksamp(x.imag, self.buf.imag)
     [D2, z, p2] = stats.anderson_ksamp(x.real, self.buf.real)
     if p1 < 0.05 and p2 < 0.05:
         print('Not similar, p is ', p1, " ", p2, 'at sample',
               self.nitems_read(0))
         self.ctr = self.ctr + 1
         print self.ctr
     self.buf = np.copy(x)
     out[:] = in0
     return len(output_items[0])
def testAD(pdUCETSSDistances, pdRandomTSSDistances):
    #Get two lists of values
    #Feed into AD test
    arUCETSSbpDistances = pdUCETSSDistances['Distance_bp'].values.tolist()
    arRandomTSSbpDistances = pdRandomTSSDistances['Distance_bp'].values.tolist(
    )

    floatStat, critical, approxP = stats.anderson_ksamp(
        [arUCETSSbpDistances, arRandomTSSbpDistances])
    return floatStat, critical, approxP
    def adaptive_avg(self, array1, array2, ml_x, ml_y):

        # Adaptively multi-looked amplitude
        new_array1 = []
        new_array2 = []

        # Subset with centre pixel and neighbouring pixels
        # subset1 = np.zeros((array1.shape[0], ml_x, ml_y))
        # subset2 = np.zeros((array1.shape[0], ml_x, ml_y))
        cx = 0
        cy = 0
        for i in range(0, array1.shape[1], ml_x):
            cx += 1
            for j in range(0, array1.shape[2], ml_y):
                cy += 1
                # Create a (no_of_images x ml_x x ml_y) subset
                subset1 = array1[:, i:i + ml_x, j:j + ml_y]
                subset2 = array2[:, i:i + ml_x, j:j + ml_y]
                ind_h0 = []
                ind_h1 = []
                for ii in range(ml_x):
                    for jj in range(ml_y):
                        # Don't check centre pixel with centre pixel
                        if not (ii == ml_x // 2 and jj == ml_y // 2):
                            # Statistical similarity test
                            resultAnderson = st.anderson_ksamp([
                                subset2[:-1, ii, jj], subset2[:-1, ml_x // 2,
                                                              ml_y // 2]
                            ])
                            # pixels are from the same distribution with a significance level of 1%
                            if resultAnderson.significance_level < 0.01:
                                # print("Pixel [%i, %i]  is similar to centre pixel" % (ii, jj))
                                ind_h0.append([ii, jj])
                            else:
                                # print("Pixel [%i, %i]  is not similar to centre pixel" % (ii, jj))
                                ind_h1.append([ii, jj])
                if len(ind_h0) > len(ind_h1):
                    new_array1.append(
                        np.mean(self.avg_withcentre(array1, ind_h0, subset1,
                                                    ml_x, ml_y),
                                axis=1))
                    new_array2.append(
                        np.mean(self.avg_withcentre(array2, ind_h0, subset2,
                                                    ml_x, ml_y),
                                axis=1))
                else:
                    new_array1.append(np.mean(subset1, axis=(1, 2)))
                    new_array2.append(np.mean(subset2, axis=(1, 2)))
            print('Line', i + 1)
        new_array1 = np.transpose(np.array(new_array1))
        new_array2 = np.transpose(np.array(new_array2))
        final_array1 = np.reshape(new_array1, (array1.shape[0], cx, cy // cx))
        final_array2 = np.reshape(new_array2, (array1.shape[0], cx, cy // cx))

        return final_array1, final_array2
Exemple #29
0
def binary_dist_test(a, b, test='auc'):
    """
    Wrapper for difference of distribution tests for univariate observations
    from two classes.

    Parameteres
    -----------
    a, b: array-like
        The observation values in each class.

    test: str (['ad','mw', 'ks', 't']), callable
        Which test to use.
        'ad': Anderson-Darling (general test for differing distributions)
        'ks': Kolmogorov-Smirnov (general test for differing distributions)
        't': t-test for difference in locations.
        'mw': Mann Whitney U test for difference in locations (reports AUC staistic)

        if callable, should take two array-like arguments.

    Output
    ------
    stat, pval

    stat: float
        The test statistic such that larger values mean bigger differences.

    pval: float
        The p value.

    """

    if test == 't':
        stat, pval = ttest_ind(a, b, equal_var=False)
        stat = abs(stat)

    elif test in ['auc', 'mw', 'mannwhitneyu']:

        result = binary_mann_whitney_u(a, b)
        pval = result['pval']  # makes two-sided
        stat = result['auc']

    elif test == 'ks':
        stat, pval = ks_2samp(a, b)

    elif test == 'ad':
        stat, _, pval = anderson_ksamp([a, b])

    elif callable(test):
        stat, pval = test(a, b)

    else:
        raise ValueError('test = {} is is not acceptable value.'.format(test))

    return stat, pval
Exemple #30
0
def _anderson_compare_pops(first_pop_matrix, second_pop_matrix, name=None):
    """Helper function used to execute Anderson-Darling test. See anderson_de.
    """
    AD_stats = dict()
    p_stats = dict()
    for gene_id in first_pop_matrix.columns:
        AD, _, p = anderson_ksamp(
            [first_pop_matrix[gene_id], second_pop_matrix[gene_id]])
        AD_stats[gene_id] = AD
        p_stats[gene_id] = p
    return pd.Series(AD_stats, name=name), pd.Series(p_stats, name=name)
Exemple #31
0
    def calcAnderson_ksamp(self, twoSamples):
        self.logger.debug('IN calcAnderson_ksamp: Test Seq Len: %i' %
                          len(twoSamples['testSeq']))
        self.logger.debug('IN calcAnderson_ksamp: Grnd Truth Seq Len: %i' %
                          len(twoSamples['grndTruthSeq']))
        sampleArrayList = []
        #sampleArrayList.append(twoSamples['grndTruthSeq'])
        sampleArrayList.append(twoSamples['testSeq'])
        sampleArrayList.append(twoSamples['grndTruthSeq'])

        anderson_kstat, critical_val, significance = anderson_ksamp(
            sampleArrayList)
        return anderson_kstat
def get_pvalue(full_sample, subsample, N_loops, bool):
    """Computes the statistical probability value of
    a selected sample of halos being drawn from the
    full distribution of halos in the simulation using
    either the Kolmogorov Smirnov or Anderson Darling test.
    """
    if bool:
        stat, _ = stats.kstest(subsample, full_sample)
    elif not bool:
        stat, _, _ = stats.anderson_ksamp([subsample, full_sample])
    count = 0
    for i in range(N_loops):
        num_points = len(subsample)
        ran_sample = np.random.choice(full_sample, num_points, replace=True)
        if bool:
            stat_emp, _ = stats.kstest(ran_sample, full_sample)
        elif not bool:
            stat_emp, _, _ = stats.anderson_ksamp([ran_sample, full_sample])
        if stat_emp > stat:
            count += 1
    updated_pval = count / N_loops
    return updated_pval
Exemple #33
0
    def anderson(self, attrs1, attrs2):
        """
        k-sample Anderson test from `~scipy.stats.anderson_ksamp`.

        Parameters
        ----------
        attrs1 : list of attributes
            List of conditions in first sample
        attrs2 : list of attributes
            List of conditions in second sample

        Returns
        -------
        sig : float
            significance level (see `~scipy.stats.anderson_ksamp`)

        Examples
        --------
        >>> import numpy as np
        >>> import batman
        >>> from salter import LightCurve
        >>> # Create example transiting planet properties
        >>> params = batman.TransitParams()
        >>> params.t0 = 0.5
        >>> params.rp = 0.1
        >>> params.per = 1
        >>> params.duration = 0.3
        >>> params.inc = 90
        >>> params.w = 90
        >>> params.ecc = 0
        >>> params.a = 10
        >>> params.limb_dark = 'quadratic'
        >>> params.u = [0.2, 0.1]
        >>> # Create example transit light curves:
        >>> transits = [LightCurve(times=i + np.linspace(0, 1, 500),
        >>>                        fluxes=np.random.randn(500),
        >>>                        params=params) for i in range(10)]
        >>> r = Residuals(transits, params)
        >>> # How significant is the difference between the distributions of the fluxes in and out-of-transit?
        >>> r.anderson('out_of_transit', 'in_transit')
        1.1428634099527666
        >>> # How significant is the difference between the distributions of the in-transit fluxes before and after midtransit?
        >>> r.anderson(['in_transit', 'before_midtransit'], ['in_transit', 'after_midtransit'])
        0.2792395871784852
        """
        sample1, sample2 = self._and_reduce(attrs1, attrs2)

        try:
            return anderson_ksamp([sample1, sample2]).significance_level
        except OverflowError:
            return 0
Exemple #34
0
def testCompletedInvertedCumulatives(data, method = 'AndersonDarling', offset = None, plot = False):
    """Test if data sets have the same number / intensity distribution by adding zero intensity counts to the smaller sized data sets and performing a distribution comparison test on the reversed cumulative distribution"""
    
    #idea: fill up data points to the same numbers at the high intensity values and use KS test
    #cf. work in progress on thoouroghly testing the differences in histograms
    
    #fill up the low count data
    n = numpy.array([x.size for x in data]);
    nm = n.max();
    m = numpy.array([x.max() for x in data]);
    mm = m.max();
    k = n.size;
    #print nm, mm, k
    
    if offset is None:
        #assume data starts at 0 !
        offset = mm / nm; #ideall for all statistics this should be mm + eps to have as little influence as possible.
    

    datac = [x.copy() for x in data];
    for i in range(m.size):
        if n[i] < nm:
            datac[i] = numpy.concatenate((-datac[i], numpy.ones(nm-n[i], dtype = datac[i].dtype) * (offset))); # + 10E-5 * numpy.random.rand(nm-n[i])));
        else:
            datac[i] = -datac[i];
         
    #test by plotting
    if plot is True:
        import matplotlib.pyplot as plt;
        for i in range(m.size):
            datac[i].sort();
            plt.step(datac[i], numpy.arange(datac[i].size));
    
    #perfomr the tests
    if method == 'KolmogorovSmirnov' or method == 'KS':
        if k == 2:
            (s, p) = stats.ks_2samp(datac[0], datac[1]);
        else:
            raise RuntimeError('KolmogorovSmirnov only for 2 samples not %d' % k);
        
    elif method == 'CramervonMises' or method == 'CM':
        if k == 2:
            (s,p) = stats2.testCramerVonMises2Sample(datac[0], datac[1]);
        else:
            raise RuntimeError('CramervonMises only for 2 samples not %d' % k);
      
    elif method == 'AndersonDarling' or method == 'AD':
        (s,a,p) = stats.anderson_ksamp(datac);

    return (p,s);
Exemple #35
0
 def __init__(self, data):
     self.dist = stats.genextreme
     self.distname = "genextreme"
     self.data = data
     self.p = self.dist.fit(self.data)
     self.frozen = self.dist(self.p[0], loc=self.p[1], scale=self.p[2])
     self.pdf = lambda x: self.frozen.pdf(x)
     self.sample = self.frozen.rvs(len(self.data))
     self.sample2 = self.frozen.rvs(100000)
     self.moments = self.frozen.stats(moments="mvsk")
     self.MAPP = fmin(lambda x: -self.pdf(x), self.moments[0], disp=0)[0]
     try:
         self.ad = stats.anderson_ksamp([self.sample, self.data])[0]
     except:
         self.ad = np.infty
Exemple #36
0
    def test_result_attributes(self):
        # Example data from Scholz & Stephens (1987), originally
        # published in Lehmann (1995, Nonparametrics, Statistical
        # Methods Based on Ranks, p. 309)
        # Pass a mixture of lists and arrays
        t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
        t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
        t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
        t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])

        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', message='approximate p-value')
            res = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)

        attributes = ('statistic', 'critical_values', 'significance_level')
        check_named_results(res, attributes)
Exemple #37
0
 def __init__(self, data):
     self.dist = stats.genextreme
     self.distname = "genextreme"
     self.data = data
     self.p = self.dist.fit(self.data)
     self.frozen = self.dist(self.p[0], loc=self.p[1], scale=self.p[2])
     self.pdf = lambda x : self.frozen.pdf(x)
     self.sample = self.frozen.rvs(len(self.data))
     self.sample2 = self.frozen.rvs(100000)
     self.moments = self.frozen.stats(moments="mvsk")
     self.MAPP = fmin(lambda x: -self.pdf(x),
                     self.moments[0], disp=0)[0]
     try:
         self.ad =  stats.anderson_ksamp([self.sample, self.data])[0]
     except:
         self.ad = np.infty
Exemple #38
0
 def __init__(self, data, dist, distname):
     self.dist = dist
     self.distname = distname
     self.data = data
     self.p = self.dist.fit(self.data)
     self.pdf = lambda x : self.dist.pdf(x, *self.p[:-2], loc=self.p[-2],
                                         scale=self.p[-1])
     self.sample = stats.norm.rvs(self.p[0], size=len(self.data),
                                        scale=self.p[-1])
     self.moments = self.dist.stats(*self.p, moments="mvsk")
     self.MAPP = fmin(lambda x: -self.pdf(x),
                     self.moments[0], disp=0)[0]
     try:
         self.ad =  stats.anderson_ksamp([self.sample, self.data])[0]
     except:
         self.ad = np.infty
Exemple #39
0
    def test_example1b(self):
        # Example data from Scholz & Stephens (1987), originally
        # published in Lehmann (1995, Nonparametrics, Statistical
        # Methods Based on Ranks, p. 309)
        # Pass arrays
        t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
        t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
        t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
        t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', message='approximate p-value')
            Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True)

        assert_almost_equal(Tk, 4.480, 3)
        assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
                                  tm, 4)
        assert_almost_equal(p, 0.0020, 4)
Exemple #40
0
    def get_unsorted_ks(self, matrix_id):
        flcm = AnalysisDatasets.objects\
            .filter(analysis_id=self.id, count_matrix=matrix_id)\
            .select_related('count_matrix')\
            .first()

        n = len(flcm.count_matrix.df['All bins'])
        quartiles = [[], [], [], []]
        for i, value in enumerate(flcm.count_matrix.df['All bins']):
            quartiles[math.floor(4*i/n)].append(value)

        stat, cv, sig = stats.anderson_ksamp(quartiles)
        return {
            'statistic': stat,
            'critical_values': cv,
            'significance': sig,
        }
Exemple #41
0
    def get_ks(self, vector_id, matrix_id):
        if not self.output:
            return False

        output = self.output_json
        sort_order = output['sort_orders'].get(vector_id)
        flcm = AnalysisDatasets.objects\
            .filter(analysis_id=self.id, count_matrix=matrix_id)\
            .select_related('count_matrix')\
            .first()

        n = len(sort_order)
        values = list(flcm.count_matrix.df['All bins'])
        quartiles = [[], [], [], []]
        for i, index in enumerate(sort_order):
            quartiles[math.floor(4*i/n)].append(values[index])

        stat, cv, sig = stats.anderson_ksamp(quartiles)
        return {
            'statistic': stat,
            'critical_values': cv,
            'significance': sig,
        }
def main():
    # assuming 'theFile' contains one name per line, read the file
    
    
    if getpass.getuser() == 'David':
        pickleFilename = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/pilotData2.p'
#         resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_3.csv'
#         resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_8_edit2.csv'
        resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv'
        saveDirectory = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/plots5/'

    elif getpass.getuser() == 'frenchd':
        pickleFilename = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/pilotData2.p'
#         resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_3.csv'
#         resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_8_edit2.csv'
        resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv'
        saveDirectory = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/plots5/'

    else:
        print 'Could not determine username. Exiting.'
        sys.exit()
    
    # use the old pickle file to get the full galaxy dataset info
    pickleFile = open(pickleFilename,'rU')
    fullDict = pickle.load(pickleFile)
    
    pickleFile.close()
    
    
    # save each plot?
    save = False
    
    results = open(resultsFilename,'rU')
    reader = csv.DictReader(results)
    
    virInclude = False
    cusInclude = False
    finalInclude = True
    
    maxEnv = 3000
    minL = 0.001
    
    # if match, then the includes in the file have to MATCH the includes above. e.g., if 
    # virInclude = False, cusInclude = True, finalInclude = False, then only systems
    # matching those three would be included. Otherwise, all cusInclude = True would be included
    # regardless of the others
    match = False
    
    # all the lists to be used for associated lines
    nameList = []
    lyaVList = []
    lyaWList = []
    lyaErrList = []
    naList = []
    bList = []
    impactList = []
    azList = []
    incList = []
    fancyIncList = []
    cosIncList = []
    cosFancyIncList = []
    paList = []
    vcorrList = []
    majList = []
    difList = []
    envList = []
    morphList = []
    m15List = []
    virList = []
    likeList = []
    likem15List = []
    AGNnameList = []
    
    # for ambiguous lines
    lyaVAmbList = []
    lyaWAmbList = []
    envAmbList = []
    ambAGNnameList = []
    
    for l in reader:
        include_vir = eval(l['include_vir'])
        include_cus = eval(l['include_custom'])
        include = eval(l['include'])
        
        go = False
        if match:
            if virInclude == include_vir and cusInclude == include_cus:
                go = True
            else:
                go = False
                
        else:
            if virInclude and include_vir:
                go = True
                
            elif cusInclude and include_cus:
                go = True
                
            elif finalInclude and include:
                go = True
            
            else:
                go = False
        
        if go:
            AGNname = l['AGNname']
            AGNra_dec = eval(l['degreesJ2000RA_DecAGN'])
            galaxyRA_Dec = eval(l['degreesJ2000RA_DecGalaxy'])
            lyaV = l['Lya_v']
            lyaW = l['Lya_W'].partition('pm')[0]
            lyaW_err = l['Lya_W'].partition('pm')[2]
            env = l['environment']
            galaxyName = l['galaxyName']
            impact = l['impactParameter (kpc)']
            galaxyDist = l['distGalaxy (Mpc)']
            pa = l['positionAngle (deg)']
            RC3pa = l['RC3pa (deg)']
            morph = l['final_morphology']
            vcorr = l['vcorrGalaxy (km/s)']
            maj = l['majorAxis (kpc)']
            minor = l['minorAxis (kpc)']
            inc = l['inclination (deg)']
            az = l['azimuth (deg)']
            b = l['b'].partition('pm')[0]
            b_err = l['b'].partition('pm')[2]
            na = eval(l['Na'].partition(' pm ')[0])
#             print "l['Na'].partition(' pm ')[2] : ",l['Na'].partition(' pm ')
            na_err = eval(l['Na'].partition(' pm ')[2])
            likelihood = l['likelihood']
            likelihoodm15 = l['likelihood_1.5']
            virialRadius = l['virialRadius']
            m15 = l['d^1.5']
            vel_diff = l['vel_diff']
            
            if isNumber(inc):
                cosInc = cos(float(inc) * pi/180.)
                
                if isNumber(maj) and isNumber(minor):
                    q0 = 0.2
                    fancyInc = calculateFancyInclination(maj,minor,q0)
                    cosFancyInc = cos(fancyInc * pi/180)
                else:
                    fancyInc = -99
                    cosFancyInc = -99
            else:
                cosInc = -99
                inc = -99
                fancyInc = -99
                cosFancyInc = -99
        
            if isNumber(pa):
                pa = float(pa)
            elif isNumber(RC3pa):
                pa = float(RC3pa)
            else:
                pa = -99
                
            if isNumber(az):
                az = float(az)
            else:
                az = -99
                
            if isNumber(maj):
                maj = float(maj)
                virialRadius = float(virialRadius)
            else:
                maj = -99
                virialRadius = -99
            
            # all the lists to be used for associated lines
            if float(env) <= maxEnv and float(likelihood) >=minL:
                nameList.append(galaxyName)
                AGNnameList.append(AGNname)
                lyaVList.append(float(lyaV))
                lyaWList.append(float(lyaW))
                lyaErrList.append(float(lyaW_err))
                naList.append(na)
                bList.append(float(b))
                impactList.append(float(impact))
                azList.append(az)
                incList.append(float(inc))
                fancyIncList.append(fancyInc)
                cosIncList.append(cosInc)
                cosFancyIncList.append(cosFancyInc)
                paList.append(pa)
                vcorrList.append(vcorr)
                majList.append(maj)
                difList.append(float(vel_diff))
                envList.append(float(env))
                morphList.append(morph)
                m15List.append(m15)
                virList.append(virialRadius)
                likeList.append(likelihood)
                likem15List.append(likelihoodm15)
            
        else:
            lyaV = l['Lya_v']
            lyaW = l['Lya_W'].partition('pm')[0]
            lyaW_err = l['Lya_W'].partition('pm')[2]
            env = l['environment']
            AGNname = l['AGNname']
             
        
            lyaVAmbList.append(float(lyaV))
            lyaWAmbList.append(float(lyaW))
            envAmbList.append(float(env))
            ambAGNnameList.append(AGNname)

    results.close()
    
        
    # lists for the full galaxy dataset
    allPA = fullDict['allPA']
    allInclinations = fullDict['allInclinations']
    allCosInclinations = fullDict['allCosInclinations']
    allFancyInclinations = fullDict['allFancyInclinations']
    allCosFancyInclinations = fullDict['allCosFancyInclinations']
    
    total = 0
    totalNo = 0
    totalYes = 0
    totalIsolated = 0
    totalGroup = 0
    

########################################################################################
#########################################################################################
    
    # print all the things
    #
    
    # absorber info lists
    blues = []
    reds = []
    blueAbs = []
    redAbs = []
    blueW = []
    redW = []
    blueB = []
    redB = []
    blueErr = []
    redErr = []
    blueV = []
    redV = []
    blueImpact = []
    redImpact = []
    
    # galaxy info lists
    blueInc = []
    redInc = []
    blueFancyInc = []
    redFancyInc = []
    blueAz = []
    redAz = []
    bluePA = []
    redPA = []
    blueVcorr = []
    redVcorr = []
    blueEnv = []
    redEnv = []
    blueVir = []
    redVir = []
    blueLike = []
    redLike = []
    

    # ambiguous stuff
    void = []
    ambig = []
    for v,w,e in zip(lyaVAmbList,lyaWAmbList,envAmbList):
        if e == 0:
            void.append(w)
        else:
            ambig.append(w)
    
    
    # for targets
    finalTargets = {}
    for a in AGNnameList:
        if finalTargets.has_key(a):
            i = finalTargets[a]
            i+=1
            finalTargets[a] = i
            
        else:
            finalTargets[a] = 1
            
    # for ambiguous targets
    ambTargets = {}
    for a in ambAGNnameList:
        if ambTargets.has_key(a):
            i = ambTargets[a]
            i+=1
            ambTargets[a] = i
            
        else:
            ambTargets[a] = 1
        
    
    # for absorbers
    for d,w,e,v,i,b in zip(difList,lyaWList,lyaErrList,lyaVList,impactList,bList):
        if d>=0:
            blues.append(float(d))
            blueW.append(float(w))
            blueErr.append(float(e))
            blueV.append(float(v))
            blueImpact.append(float(i))
            blueAbs.append(abs(d))
            blueB.append(float(b))
        else:
            reds.append(float(d))
            redW.append(float(w))
            redErr.append(float(e))
            redV.append(float(v))
            redImpact.append(float(i))
            redAbs.append(abs(d))
            redB.append(float(b))
            
            
##########################################################################################
    blueSpiralInc = []
    redSpiralInc = []
    spiralIncList = []
    # for spirals only
    for d,inc in zip(difList,fancyIncList):
        spiralIncList.append(float(inc))
        if d>=0:
            blueSpiralInc.append(float(inc))
        else:
            redSpiralInc.append(float(inc))
                
                
    # compile a list of only spiral galaxy inclinations from the full galaxy table
    if getpass.getuser() == 'David':
        galaxyFile = open('/Users/David/Research_Documents/gt/NewGalaxyTable5.csv','rU')
    else:
        print 'Not on laptop, exiting'
        sys.exit()
        
    reader = csv.DictReader(galaxyFile)
    
    allDiameters = []
    incGT25diam = []
    allSpiralIncList = []
    
    q0 = 0.2
    for i in reader:
        major,minor = eval(i['linDiameters (kpc)'])
        morph = i['morphology'].lower()
        if bfind(morph,'s'):
            if not bfind(morph,'sph') and not bfind(morph,'s0'):
            
                if isNumber(major):
                    if isNumber(minor):
                        if float(major) > float(minor):
                            fInc = calculateFancyInclination(major,minor,q0)
                            allSpiralIncList.append(fInc)
                            
                            if float(major) >=25.0:
                                incGT25diam.append(fInc)
    
    galaxyFile.close()
    
##########################################################################################
                   
            
    nameDict = {}
    # for galaxies
    for d,inc,finc,az,pa,vcorr,e,vir,l,name in zip(difList,incList,fancyIncList,azList,paList,vcorrList,envList,virList, likeList,nameList):
        if nameDict.has_key(name):
            i = nameDict[name]
            i+=1
            nameDict[name] = i
        else:
            nameDict[name] = 1
        
        if d>=0:
            if inc !=-99:
                blueInc.append(float(inc))
            if finc !=-99:
                blueFancyInc.append(float(finc))
            if az !=-99:
                blueAz.append(float(az))
            if pa !=-99:
                bluePA.append(float(pa))
            if vcorr !=-99:
                blueVcorr.append(float(vcorr))
            blueEnv.append(float(e))
            if vir !=-99:
                blueVir.append(float(vir))
            if l !=-99:
                blueLike.append(float(l))
        else:
            if inc !=-99:
                redInc.append(float(inc))
            if finc !=-99:
                redFancyInc.append(float(finc))
            if az !=-99:
                redAz.append(float(az))
            if pa !=-99:
                redPA.append(float(pa))
            if vcorr !=-99:
                redVcorr.append(float(vcorr))
            redEnv.append(float(e))
            if vir !=-99:
                redVir.append(float(vir))
            if l !=-99:
                redLike.append(float(l))
                
    galaxyNames = nameDict.keys()
                
    # how many absorbers above vs below vel_cut?
    redVelCount200 = 0
    redVelCount100 = 0
    blueVelCount200 = 0
    blueVelCount100 = 0
    
    for b in blues:
        if b >=200:
            blueVelCount200 +=1
        if b >= 100:
            blueVelCount100 +=1
        
    for r in reds:
        if abs(r) >=200:
            redVelCount200 +=1
        if abs(r) >=100:
            redVelCount100 +=1
    

    assocFancyInc = blueFancyInc + redFancyInc

    
    print
    print '------------------------ Pilot Data ------------------------------'
    print
    print ' FOR THE FOLLOWING INCLUDE SET:'
    print ' Virial radius include = ',virInclude
    print ' Custom include =        ',cusInclude
    print ' Final include =         ',finalInclude
    print ' Match =                 ',match
    print
    print 'total number of lines: ', len(lyaWList) + len(lyaWAmbList)
    print 'total number of unique galaxies matched: ',len(galaxyNames)
    print 'total number of associated lines: ',len(difList)
    print 'total number of ambiguous lines: ',len(ambig)
    print 'total number of void lines: ',len(void)
    print '# of redshifted lines: ',len(reds)
    print '# of blueshifted lines: ',len(blues)
    print
    print
    print ' ASSOCIATED TARGETS '
    print
    print 'final target number: ',len(finalTargets.keys())
    for i in finalTargets.keys():
        print i
    print
    print
    print ' AMBIGUOUS TARGTS '
    print
    print 'final ambiguous number: ',len(ambTargets.keys())
    for i in ambTargets.keys():
        print i
    print
    print
    print '----------------------- Absorber info ----------------------------'
    print
    print 'avg blueshifted EW: ',mean(blueW)
    print 'median blueshifted EW: ',median(blueW)
    print 'avg blue err: ',mean(blueErr)
    print 'median blue err: ',median(blueErr)
    print
    print 'std(blue EW): ',std(blueW)
    print 'stats.sem(blue EW): ',stats.sem(blueW)
    print 'stats.describe(blue EW): ',stats.describe(blueW)
    print
    print 'avg blueshifted vel_diff: ',mean(blues)
    print 'median blueshifted vel_diff: ',median(blues)
    print 'std(blueshifted vel_diff): ',std(blues)
    print 'stats.sem(blue vel_dif): ',stats.sem(blues)
    print 'stats.describe(blue vel_dif: ',stats.describe(blues)
    print
    print '% blueshifted which have vel_diff >= 200 km/s: {0}'.format(float(blueVelCount200)/len(blues))
    print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(blueVelCount200)
    print '% blueshifted which have vel_diff >= 100 km/s: {0}'.format(float(blueVelCount100)/len(blues))
    print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(blueVelCount100)
    print
    
    print 'avg blue velocity: ',mean(blueV)
    print 'median blue velocity: ',median(blueV)
    print 'std(blue Velocity): ',std(blueV)
    print 'avg blue impact: ',mean(blueImpact)
    print 'median blue impact: ',median(blueImpact)
    print 'stats.sem(blue impact): ',stats.sem(blueImpact)
    print 'stats.describe(blue impact): ',stats.describe(blueImpact)

    print
    
    print 'avg redshifted EW: ',mean(redW)
    print 'median redshifted EW: ',median(redW)
    print 'avg red err: ',mean(redErr)
    print 'median red err: ',median(redErr)
    print
    print 'std(red EW): ',std(redW)
    print 'stats.sem(red EW): ',stats.sem(redW)
    print 'stats.describe(red EW): ',stats.describe(redW)

    print
    print 'avg redshifted vel_diff: ',mean(reds)
    print 'median redshifted vel_diff: ',median(reds)
    print 'std(redshifted vel_dif): ',std(reds)
    print 'stats.sem(red vel_dif): ',stats.sem(reds)
    print 'stats.describe(red vel_dif): ',stats.describe(reds)
    print
    print '% redshifted which have abs(vel_diff) >= 200 km/s: {0}'.format(float(redVelCount200)/len(reds))
    print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(redVelCount200)
    print '% redshifted which have abs(vel_diff) >= 100 km/s: {0}'.format(float(redVelCount100)/len(reds))
    print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(redVelCount100)
    print

    print 'avg red velocity: ',mean(redV)
    print 'median red velocity: ',median(redV)
    print
    print 'avg red impact: ',mean(redImpact)
    print 'median red impact: ',median(redImpact)
    print 'stats.sem(red impact): ',stats.sem(redImpact)
    print 'stats.describe(red impact): ',stats.describe(redImpact)
    print 'std(red impact): ',std(redImpact)



    print
    print '----------------------- Galaxy info ----------------------------'
    print
    
    # regular inclinations
    incCut = 50
    totalBlueInc = len(blueInc)
    totalRedInc = len(redInc)
    
    blueIncCount = 0
    for i in blueInc:
        if i >= incCut:
            blueIncCount +=1
            
    redIncCount = 0
    for i in redInc:
        if i >= incCut:
            redIncCount +=1
            
    totalInc = len(allInclinations)
    totalCount = 0
    for i in allInclinations:
        if i >= incCut:
            totalCount +=1
            
            
    # fancy inclinations
    totalBlueFancyInc = len(blueFancyInc)
    totalRedFancyInc = len(redFancyInc)
    
    blueFancyIncCount = 0
    for i in blueFancyInc:
        if i >= incCut:
            blueFancyIncCount +=1
            
    redFancyIncCount = 0
    for i in redFancyInc:
        if i >= incCut:
            redFancyIncCount +=1
            
    combinedCount = redFancyIncCount + blueFancyIncCount
    totalCombinedCount = totalRedFancyInc + totalBlueFancyInc
            
    totalFancyInc = len(allFancyInclinations)
    totalFancyCount = 0
    for i in allFancyInclinations:
        if i >= incCut:
            totalFancyCount +=1
    
    print
    print ' INCLINATIONS: '
    print 
    print 'Blue: {0} % of associated galaxies have >={1}% inclination'.format(float(blueIncCount)/float(totalBlueInc),incCut)
    print 'Red: {0} % of associated galaxies have >={1}% inclination'.format(float(redIncCount)/float(totalRedInc),incCut)
    print 'All: {0} % of ALL galaxies have >={1}% inclination'.format(float(totalCount)/float(totalInc),incCut)
    print
    print ' FANCY INCLINATIONS: '
    print
    print 'Blue: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(blueFancyIncCount)/float(totalBlueFancyInc),incCut)
    print 'Red: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(redFancyIncCount)/float(totalRedFancyInc),incCut)
    print 'All: {0} % of ALL galaxies have >={1}% fancy inclination'.format(float(totalFancyCount)/float(totalFancyInc),incCut)
    print 'Combined: {0} % of associated galaxies have >= {1} fancy inclination'.format(float(combinedCount)/float(totalCombinedCount),incCut)
    print
    print 'Average all fancy inclination: ',mean(allFancyInclinations)
    print 'stats.sem(all): ',stats.sem(allFancyInclinations)
    print    
    print 'avg blue inclination: ',mean(blueInc)
    print 'median blue inclination: ',median(blueInc)
    print 'avg blue fancy inclination: ',mean(blueFancyInc)
    print 'median blue fancy inclination: ',median(blueFancyInc)
    print
    print 'avg red inclination: ',mean(redInc)
    print 'median red inclination: ',median(redInc)
    print 'avg red fancy inclination: ',mean(redFancyInc)
    print 'median red fancy inclination: ',median(redFancyInc)
    
    print
    print 'mean associated: ',mean(assocFancyInc)
    print 'stats.sem(associated): ',stats.sem(assocFancyInc)
    print 'stats.describe(associated): ',stats.describe(assocFancyInc)
    print 'stats.sem(blue): ',stats.sem(blueFancyInc)
    print 'stats.describe(blue): ',stats.describe(blueFancyInc)
    print
    print 'stats.sem(red): ',stats.sem(redFancyInc)
    print 'stats.describe(red): ',stats.describe(redFancyInc)
    
    print
    print "  AZIMUTHS and PA:  "
    print
    print 'avg blue azimuth: ',mean(blueAz)
    print 'median blue azimuth: ',median(blueAz)
    print 'stats.sem(blue az): ',stats.sem(blueAz)
    print 'stats.describe(blue az): ',stats.describe(blueAz)
    print
    print 'avg red azimuth: ',mean(redAz)
    print 'median red azimuth: ',median(redAz)
    print 'stats.sem(red az): ',stats.sem(redAz)
    print 'stats.describe(red az): ',stats.describe(redAz)
    print
    print 'avg blue PA: ',mean(bluePA)
    print 'median blue PA: ',median(bluePA)
    print
    print 'avg red PA: ',mean(redPA)
    print 'median red PA: ',median(redPA)
    
    print
    print ' VCORR : '
    print
    print 'avg blue vcorr: ',mean(blueVcorr)
    print 'median blue vcorr: ',median(blueVcorr)
    print
    print 'avg red vcorr: ',mean(redVcorr)
    print 'median red vcorr: ',median(redVcorr)
    
    print
    print ' ENVIRONMENT: '
    print
    print 'avg blue environment: ',mean(blueEnv)
    print 'median blue environment: ',median(blueEnv)
    print
    print 'avg red environment: ',mean(redEnv)
    print 'median red environment: ',median(redEnv)
    
    print
    print ' R_vir: '
    print
    print 'avg blue R_vir: ',mean(blueVir)
    print 'median blue R_vir: ',median(blueVir)
    print 'stats.sem(blue R_vir): ',stats.sem(blueVir)
    print 'stats.describe(blue R_vir): ',stats.describe(blueVir)
    print
    print 'avg red R_vir: ',mean(redVir)
    print 'median red R_vir: ',median(redVir)
    print 'stats.sem(red R_vir): ',stats.sem(redVir)
    print 'stats.describe(red R_vir): ',stats.describe(redVir)

    print
    print ' LIKELIHOOD: '
    print
    print 'avg blue likelihood: ',mean(blueLike)
    print 'median blue likelihood: ',median(blueLike)
    print
    print 'avg red likelihood: ',mean(redLike)
    print 'median red likelihood: ',median(redLike)
    
    print
    print
    print '-------------------- Distribution analysis ----------------------'
    print
    print
    
    print ' FANCY INCLINATIONS: '
    
    # perform the K-S and AD tests for inclination
    ans1 = stats.ks_2samp(blueFancyInc, redFancyInc)
    ans1a = stats.anderson_ksamp([blueFancyInc,redFancyInc])

    print 'KS for blue vs red fancy inclinations: ',ans1
    print 'AD for blue vs red fancy inclinations: ',ans1a
    
    ans2 = stats.ks_2samp(blueFancyInc, allFancyInclinations)
    print 'KS for blue vs all fancy inclinations: ',ans2
    
    ans3 = stats.ks_2samp(redFancyInc, allFancyInclinations)
    print 'KS for red vs all fancy inclinations: ',ans3
    
    print
    z_statrb, p_valrb = stats.ranksums(blueFancyInc, redFancyInc)
    z_statall, p_valall = stats.ranksums(assocFancyInc, allFancyInclinations)
    print 'ranksum red vs blue p-value: ',p_valrb
    print 'ranksum associated vs all: ',p_valall


#     ans4 = stats.ks_2samp(assocFancyInc, allFancyInclinations)
#     ans4a = stats.anderson_ksamp([assocFancyInc,allFancyInclinations])
# 
#     print 'KS for all associated vs all fancy inclinations: ',ans4
#     print 'AD for all associated vs all fancy inclinations: ',ans4a
#     
    print

#     ans5 = stats.ks_2samp(spiralIncList, allSpiralIncList)
#     ans5a = stats.anderson_ksamp([spiralIncList,allSpiralIncList])
# 
#     print 'KS for all spiral associated vs all spiral fancy inclinations: ',ans5
#     print 'AD for all spiral associated vs all spiral fancy inclinations: ',ans5a
    
    print
    print ' INCLINATIONS: '
    print
    
    # perform the K-S and AD tests for inclination
    ans1 = stats.ks_2samp(blueInc, redInc)
    ans1a = stats.anderson_ksamp([blueInc,redInc])

    print 'KS for blue vs red inclinations: ',ans1
    print 'AD for blue vs red inclinations: ',ans1a
    
    ans2 = stats.ks_2samp(blueInc, allInclinations)
    print 'KS for blue vs all inclinations: ',ans2
    
    ans3 = stats.ks_2samp(redInc, allInclinations)
    print 'KS for red vs all inclinations: ',ans3
    
    assocInc = blueInc + redInc
    ans4 = stats.ks_2samp(assocInc, allInclinations)
    print 'KS for associated vs all inclinations: ',ans4
    
    print
    print ' EW Distributions: '
    print
    
    # perform the K-S and AD tests for EW
    ans1 = stats.ks_2samp(blueW, redW)
    ans1a = stats.anderson_ksamp([blueW,redW])
    print 'KS for blue vs red EW: ',ans1
    print 'AD for blue vs red EW: ',ans1a
    

    print
    print ' Impact parameter Distributions: '
    print
    
    # perform the K-S and AD tests for impact parameter
    ans1 = stats.ks_2samp(blueImpact, redImpact)
    ans1a = stats.anderson_ksamp([blueImpact,redImpact])
    print 'KS for blue vs red impact parameters: ',ans1
    print 'AD for blue vs red impact parameters: ',ans1a
    
    print
    print ' \Delta v Distributions: '
    print
    
    # perform the K-S and AD tests for \delta v
    ans1 = stats.ks_2samp(blueAbs, redAbs)
    ans1a = stats.anderson_ksamp([blueAbs,redAbs])
    print 'KS for blue vs red \Delta v: ',ans1
    print 'AD for blue vs red \Delta v: ',ans1a
    
    print
    print ' Azimuth Distributions: '
    print
    
    # perform the K-S and AD tests for azimuth
    ans1 = stats.ks_2samp(blueAz, redAz)
    ans1a = stats.anderson_ksamp([blueAz,redAz])
    print 'KS for blue vs red azimuth: ',ans1
    print 'AD for blue vs red azimuth: ',ans1a
    print
    
    # now against a flat distribution
    flatRed = arange(0,90,1)
    flatBlue = arange(0,90,1)

    ans1 = stats.ks_2samp(blueAz, flatBlue)
    ans1a = stats.anderson_ksamp([blueAz,flatBlue])
    print 'KS for blue vs flat azimuth: ',ans1
    print 'AD for blue vs flat azimuth: ',ans1a
    print
    ans1 = stats.ks_2samp(redAz, flatRed)
    ans1a = stats.anderson_ksamp([redAz,flatRed])
    print 'KS for red vs flat azimuth: ',ans1
    print 'AD for erd vs flat azimuth: ',ans1a
    print
    
            
    print
    print ' Environment Distributions: '
    print
    
    # perform the K-S and AD tests for environment
    ans1 = stats.ks_2samp(blueEnv, redEnv)
    ans1a = stats.anderson_ksamp([blueEnv,redEnv])
    print 'KS for blue vs red environment: ',ans1
    print 'AD for blue vs red environment: ',ans1a
    
    print
    print ' R_vir Distributions: '
    print
    
    # perform the K-S and AD tests for r_vir
    ans1 = stats.ks_2samp(blueVir, redVir)
    ans1a = stats.anderson_ksamp([blueVir,redVir])
    print 'KS for blue vs red R_vir: ',ans1
    print 'AD for blue vs red R_vir: ',ans1a
    
    print
    print ' Doppler parameter Distributions: '
    print
    
    # perform the K-S and AD tests for doppler parameter
    ans1 = stats.ks_2samp(blueB, redB)
    ans1a = stats.anderson_ksamp([blueB,redB])
    print 'KS for blue vs red doppler parameter: ',ans1
    print 'AD for blue vs red doppler parameter: ',ans1a
    
    print
    print ' Likelihood Distributions: '
    print
    
    # perform the K-S and AD tests for doppler parameter
    ans1 = stats.ks_2samp(blueLike, redLike)
    ans1a = stats.anderson_ksamp([blueLike,redLike])
    print 'KS for blue vs red likelihood: ',ans1
    print 'AD for blue vs red likelihood: ',ans1a
    
    print
    print ' COMPLETED. '
Exemple #43
0
def best_fit(opts, R_proj, r_scale_ini, grid, bg_density_ini = 0.0, fit_bg = True,
                   fpb = 1, weights = None):

    if fit_bg:
        print ' Fitting the scale radius and the background for', len(R_proj), 'members.'
    else:
        print ' Fitting the scale radius only for', len(R_proj), 'members.'
    print ' Initial estimates of parameters: ', r_scale_ini, bg_density_ini

    if weights is None:
        weights = np.ones(len(R_proj))

    # Sort data
    R_proj = R_proj[np.argsort(R_proj)]
    weights = weights[np.argsort(R_proj)]

    # Get number density for various points along R_proj 
    num_points = fpb * int(np.sqrt(len(R_proj)))
    if num_points < 5:
        print ' Only ', len(R_proj), 'data - not enough for the profiles'
    rd, dp, edp = get_points(R_proj, num_points, weights)

    # Find best values for scale radius and background density
    if not fit_bg:
        bg_bounds = (bg_density_ini, bg_density_ini)
    else:
        bg_bounds = (0.001, None)

    # Get fit to profile      
    x_fit = np.arange(0.001, 5.0, 0.005)

    if opts.model == 'beta':

        print ' Using beta model. [Beta = ' + str(opts.beta) + ']'
        
        r_scale_best, bg_density_best = minimize(bm_proj_maxlik_bg, [r_scale_ini, bg_density_ini],
                                                 args = (R_proj, opts.beta, ), method = 'SLSQP',
                                                 options={'disp': False},
                                                 bounds = ((0.001, None), bg_bounds)).x
    
        y_fit = bm_num_density(R_proj, r_scale_best, bg_density_best, opts.beta) * \
          np.array(map(partial(bm_proj_sd, alpha = opts.beta), (x_fit / r_scale_best))) + \
          bg_density_best * np.sum(weights) / len(R_proj)

    else:

        print ' Using NFW profile.'
          
        r_scale_best, bg_density_best = minimize(nfw_proj_maxlik_bg, [r_scale_ini, bg_density_ini],
                                                 args = (R_proj, ), method = 'SLSQP',
                                                 options={'disp': False},
                                                 bounds = ((0.001, None), bg_bounds)).x

        y_fit = nfw_num_density(R_proj, r_scale_best, bg_density_best) * \
          np.array(map(nfw_proj_sd, (x_fit / r_scale_best))) + \
          bg_density_best * np.sum(weights) / len(R_proj)

    # Evaluate chi^2
    if fit_bg:
        npfree = 2
    else:
        npfree = 1
        
    chi2_param = chi2_gof(np.interp(rd, x_fit, y_fit), dp, edp, npfree)

    # Evaluate K-S test
    ks_param = ks_2samp(np.interp(rd, x_fit, y_fit), dp)

    # Evaluate A-D test
    ad_param = anderson_ksamp([np.interp(rd, x_fit, y_fit), dp])

    if opts.confidence:
        # Evaluate confidence intervals for r_s
        cf_limits = confidence(opts, r_scale_best, bg_density_best, R_proj, grid, npfree)

    # Print results 
    print ''
    print ' Best-fit r_s:', r_scale_best
    if opts.confidence:
        print ' 1-sigma interval: ', cf_limits[0], cf_limits[1]
    print ' Best-fit background density:', bg_density_best, 'gals/Mpc^2'
    print ''
    print ' Chi^2 of the fit is', chi2_param[0], 'for', len(edp) - npfree, 'd.o.f.'
    print ' Probability of the fit is', chi2_param[1], '[rejected if > 0.99]'
    print ''
    print ' KS test resuls:', ks_param[0], ks_param[1]
    print ' AD test results:', ad_param[0], ad_param[2]

    return [r_scale_best, bg_density_best], [rd, dp, edp], [x_fit, y_fit], chi2_param
Exemple #44
0
 def _test_impl(self, data1: t.List[Number], data2: t.List[Number]) -> float:
     return max(st.anderson_ksamp([data1, data2])[-1], 1)
def classifier_eval(mode,keras_mode,args):
	##############################################################################
	# Setting parameters
	#


	name=args[0]
	sample1_name= args[1]
	sample2_name= args[2]

	shuffling_seed = args[3]

	#mode =0 if you want evaluation of a model =1 if grid hyperparameter search =2 if spearmint hyperparameter search
	comp_file_list=args[4]
	print(comp_file_list)
	cv_n_iter = args[5]
	clf = args[6]
	C_range = args[7]
	gamma_range = args[8]

	if len(args)>9:
		#AD mode =1 : Anderson Darling test used instead of Kolmogorov Smirnov
		#AD mode =2 : Visualisation of the decision boundary
		#AD mode anything else: use KS and no visualisation
		AD_mode = args[9]
	else:
		AD_mode = 0

        if mode==0:
                #For standard evaluation
                score_list=[]
		print("standard evaluation mode")
	elif mode==1:
		#For grid search
		print("grid hyperparameter search mode")
		param_grid = dict(gamma=gamma_range, C=C_range)

	elif mode==2:
		#For spearmint hyperparameter search
		score_list=[]
		print("spearmint hyperparameter search mode")
	else:
		print("No valid mode chosen")
		return 1
	

	##############################################################################
	# Load and prepare data set
	#
	# dataset for grid search

	for comp_file_0,comp_file_1 in comp_file_list:

		print("Operating of files :"+comp_file_0+"   "+comp_file_1)

		#extracts data from the files
		features_0=np.loadtxt(comp_file_0,dtype='d')
		features_1=np.loadtxt(comp_file_1,dtype='d')

		#determine how many data points are in each sample
		no_0=features_0.shape[0]
		no_1=features_1.shape[0]
		no_tot=no_0+no_1
		#Give all samples in file 0 the label 0 and in file 1 the feature 1
		label_0=np.zeros((no_0,1))
		label_1=np.ones((no_1,1))

		#Create an array containing samples and features.
		data_0=np.c_[features_0,label_0]
		data_1=np.c_[features_1,label_1]

		data=np.r_[data_0,data_1]

		np.random.shuffle(data)

		X=data[:,:-1]
		y=data[:,-1]
		print("X : ",X)
		print("y : ",y)
		atest_size=0.2
		if cv_n_iter==1:
			train_range = range(int(math.floor(no_tot*(1-atest_size))))
			test_range  = range(int(math.ceil(no_tot*(1-atest_size))),no_tot)
			#print("train_range : ", train_range)
			#print("test_range : ", test_range)
			acv = Counter(train_range,test_range)
			#print(acv)
		else:
			acv = StratifiedShuffleSplit(y, n_iter=cv_n_iter, test_size=atest_size, random_state=42)

		print("Finished with setting up samples")

		# It is usually a good idea to scale the data for SVM training.
		# We are cheating a bit in this example in scaling all of the data,
		# instead of fitting the transformation on the training set and
		# just applying it on the test set.

		if AD_mode != 2:
			scaler = StandardScaler()
			X = scaler.fit_transform(X)

		if mode==1:
			##############################################################################
			# Grid Search
			#
			# Train classifiers
			#
			# For an initial search, a logarithmic grid with basis
			# 10 is often helpful. Using a basis of 2, a finer
			# tuning can be achieved but at a much higher cost.

			if AD_mode==1:
				grid = GridSearchCV(clf, scoring=p_value_scoring_object.p_value_scoring_object_AD ,param_grid=param_grid, cv=acv)
			else:
				grid = GridSearchCV(clf, scoring=p_value_scoring_object.p_value_scoring_object ,param_grid=param_grid, cv=acv)
			

			grid.fit(X, y)

			print("The best parameters are %s with a score of %0.2f"
					% (grid.best_params_, grid.best_score_))

			# Now we need to fit a classifier for all parameters in the 2d version
			# (we use a smaller set of parameters here because it takes a while to train)

			C_2d_range = [1e-2, 1, 1e2]
			gamma_2d_range = [1e-1, 1, 1e1]
			classifiers = []
			for C in C_2d_range:
				for gamma in gamma_2d_range:
					clf = SVC(C=C, gamma=gamma)
					clf.fit(X_2d, y_2d)
					classifiers.append((C, gamma, clf))

			##############################################################################
			# visualization
			#
			# draw visualization of parameter effects

			plt.figure(figsize=(8, 6))
			xx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200))
			for (k, (C, gamma, clf)) in enumerate(classifiers):
				# evaluate decision function in a grid
				Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
				Z = Z.reshape(xx.shape)

				# visualize decision function for these parameters
				plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1)
				plt.title("gamma=10^%d, C=10^%d" % (np.log10(gamma), np.log10(C)),size='medium')

				# visualize parameter's effect on decision function
				plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)
				plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r)
				plt.xticks(())
				plt.yticks(())
				plt.axis('tight')

				plt.savefig('prediction_comparison.png')
				# plot the scores of the grid
				# grid_scores_ contains parameter settings and scores
				# We extract just the scores
				scores = [x[1] for x in grid.grid_scores_]
				scores = np.array(scores).reshape(len(C_range), len(gamma_range))

			# Draw heatmap of the validation accuracy as a function of gamma and C
			#
			# The score are encoded as colors with the hot colormap which varies from dark
			# red to bright yellow. As the most interesting scores are all located in the
			# 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so
			# as to make it easier to visualize the small variations of score values in the
			# interesting range while not brutally collapsing all the low score values to
			# the same color.

			plt.figure(figsize=(8, 6))
			plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
			plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,
					norm=MidpointNormalize(vmin=-1.0, midpoint=-0.0001))
			plt.xlabel('gamma')
			plt.ylabel('C')
			plt.colorbar()
			plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)
			plt.yticks(np.arange(len(C_range)), C_range)
			plt.title('Validation accuracy')
			plt.savefig('Heat_map.png')
		else:
			if keras_mode==1:
				from keras.models import Sequential
				from keras.layers.core import Dense, Activation
				from keras.layers import Dropout
				from keras.utils import np_utils, generic_utils

				dimof_input = X.shape[1]
				dimof_output =2
				y = np_utils.to_categorical(y, dimof_output)
				
				print("dimof_input : ",dimof_input, "dimof_output : ", dimof_output)				
				#y = np_utils.to_categorical(y, dimof_output)
				scores = []
				counter = 1
				for train_index, test_index in acv:
					print("Cross validation run ", counter)
					X_train, X_test = X[train_index], X[test_index]
					y_train, y_test = y[train_index], y[test_index]
					
					print("X_train : ",X_train)
					print("y_train : ",y_train)

					batch_size = 1 
					dimof_middle = args[10] 
					dropout = 0.5 
					countof_epoch = 5 
					n_hidden_layers = args[11]

					model = Sequential() 
					model.add(Dense(input_dim=dimof_input, output_dim=dimof_middle, init="glorot_uniform",activation='tanh'))
					model.add(Dropout(dropout))

					for n in range(n_hidden_layers):
						model.add(Dense(input_dim=dimof_middle, output_dim=dimof_middle, init="glorot_uniform",activation='tanh'))
						model.add(Dropout(dropout))
							
					model.add(Dense(input_dim=dimof_middle, output_dim=dimof_output, init="glorot_uniform",activation='sigmoid'))

					#Compiling (might take longer)
					model.compile(loss='categorical_crossentropy', optimizer='sgd')
					model.fit(X_train, y_train,show_accuracy=True,batch_size=batch_size, nb_epoch=countof_epoch, verbose=0)
					prob_pred = model.predict_proba(X_test)
					print("prob_pred : ", prob_pred)
					assert (not (np.isnan(np.sum(prob_pred))))
		
					# for y is 2D change dimof_output =2, add y = np_utils.to_categorical(y, dimof_output) and change the following line
					prob_pred = np.array([sublist[0] for sublist in prob_pred])		
					y_test = np.array([sublist[0] for sublist in y_test]) 
					print("y_test : ", y_test)
					print("prob_pred : ", prob_pred)
					#Just like in p_value_scoring_strategy.py
				        y_test         = np.reshape(y_test,(1,y_test.shape[0]))
					prob_pred = np.reshape(prob_pred,(1,prob_pred.shape[0]))
					prob_0    = prob_pred[np.logical_or.reduce([y_test==0])]
					prob_1    = prob_pred[np.logical_or.reduce([y_test==1])]
					if __debug__:
						print("Plot")
					
					if AD_mode==1:
						p_AD_stat=stats.anderson_ksamp([prob_0,prob_1])
						print(p_AD_stat)
						scores.append(p_AD_stat[2])
					else:
						p_KS=stats.ks_2samp(prob_0,prob_1)
						print(p_KS)
						scores.append(p_KS[1])
					counter +=1
	
					
			else:
				if keras_mode==2:
					X, y = Xy_to_keras_Xy(X,y)				
				if AD_mode==1:
					scores = (-1)*cross_validation.cross_val_score(clf,X,y,cv=acv,scoring=p_value_scoring_object.p_value_scoring_object_AD)
				elif AD_mode==2:
					print("X[:,0].min() , ", X[:,0].min(), "X[:,0].max() : ", X[:,0].max())
					scores = (-1)*cross_validation.cross_val_score(clf,X,y,cv=acv,scoring=p_value_scoring_object.p_value_scoring_object_visualisation)
					import os
					os.rename("visualisation.png",name+"_visualisation.png")
				else:
					scores = (-1)*cross_validation.cross_val_score(clf,X,y,cv=acv,scoring=p_value_scoring_object.p_value_scoring_object)	
			print("scores : ",scores)
			score_list.append(np.mean(scores))
			if mode==2:
				return np.mean(scores)

	############################################################################################################################################################
	###############################################################  Evaluation of results  ####################################################################
	############################################################################################################################################################


	if mode==0:
		# The score list has been computed. Let's plot the distribution
		print(score_list)
		with open(name+"_p_values",'w') as p_value_file:
			for item in score_list:
				p_value_file.write(str(item)+'\n')
		histo_plot_pvalue(score_list,50,"p value","Frequency","p value distribution",name)
def main():

    
    if getpass.getuser() == 'David':
        pickleFilename = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/pilotData2.p'
        resultsFilename = '/Users/David/Research_Documents/inclination/git_inclination/LG_correlation_combined5_3.csv'
        saveDirectory = '/Users/David/Research_Documents/inclination/git_inclination/pilot_paper_code/plots/'

    elif getpass.getuser() == 'frenchd':
        pickleFilename = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/pilotData2.p'
        resultsFilename = '/usr/users/frenchd/inclination/git_inclination/LG_correlation_combined5_3.csv'
        saveDirectory = '/usr/users/frenchd/inclination/git_inclination/pilot_paper_code/plots/'

    else:
        print 'Could not determine username. Exiting.'
        sys.exit()
    

    pickleFile = open(pickleFilename,'rU')
    fullDict = pickle.load(pickleFile)
    
    pickleFile.close()
    
    # save each plot?
    save = False
    
    results = open(resultsFilename,'rU')
    reader = csv.DictReader(results)
    
    virInclude = False
    cusInclude = False
    finalInclude = True
    
    # if match, then the includes in the file have to MATCH the includes above. e.g., if 
    # virInclude = False, cusInclude = True, finalInclude = False, then only systems
    # matching those three would be included. Otherwise, all cusInclude = True would be included
    # regardless of the others
    match = False
    
    # all the lists to be used for associated lines
    lyaVList = []
    lyaWList = []
    naList = []
    bList = []
    impactList = []
    azList = []
    incList = []
    fancyIncList = []
    cosIncList = []
    cosFancyIncList = []
    paList = []
    vcorrList = []
    majList = []
    difList = []
    envList = []
    morphList = []
    m15List = []
    virList = []
    likeList = []
    likem15List = []
    
    
    for l in reader:
        include_vir = eval(l['include_vir'])
        include_cus = eval(l['include_custom'])
        include = eval(l['include'])
        
        go = False
        if match:
            if virInclude == include_vir and cusInclude == include_cus:
                go = True
            else:
                go = False
                
        else:
            if virInclude and include_vir:
                go = True
                
            elif cusInclude and include_cus:
                go = True
                
            elif finalInclude and include:
                go = True
            
            else:
                go = False
        
        if go:
            AGNra_dec = eval(l['degreesJ2000RA_DecAGN'])
            galaxyRA_Dec = eval(l['degreesJ2000RA_DecGalaxy'])
            lyaV = l['Lya_v']
            lyaW = l['Lya_W'].partition('pm')[0]
            lyaW_err = l['Lya_W'].partition('pm')[2]
            env = l['environment']
            galaxyName = l['galaxyName']
            impact = l['impactParameter (kpc)']
            galaxyDist = l['distGalaxy (Mpc)']
            pa = l['positionAngle (deg)']
            RC3pa = l['RC3pa (deg)']
            morph = l['morphology']
            vcorr = l['vcorrGalaxy (km/s)']
            maj = l['majorAxis (kpc)']
            min = l['minorAxis (kpc)']
            inc = l['inclination (deg)']
            az = l['azimuth (deg)']
            b = l['b'].partition('pm')[0]
            b_err = l['b'].partition('pm')[2]
            na = eval(l['Na'].partition(' pm ')[0])
            print "l['Na'].partition(' pm ')[2] : ",l['Na'].partition(' pm ')
            na_err = eval(l['Na'].partition(' pm ')[2])
            likelihood = l['likelihood']
            likelihoodm15 = l['likelihood_1.5']
            virialRadius = l['virialRadius']
            m15 = l['d^1.5']
            vel_diff = l['vel_diff']
            
            if isNumber(RC3pa) and not isNumber(pa):
                pa = RC3pa
            
            if isNumber(inc):
                cosInc = cos(float(inc) * pi/180.)
                
                if isNumber(maj) and isNumber(min):
                    q0 = 0.2
                    fancyInc = calculateFancyInclination(maj,min,q0)
                    cosFancyInc = cos(fancyInc * pi/180)
                else:
                    fancyInc = -99
                    cosFancyInc = -99
            else:
                cosInc = -99
                inc = -99
                fancyInc = -99
                cosFancyInc = -99
            
            # all the lists to be used for associated lines
            lyaVList.append(float(lyaV))
            lyaWList.append(float(lyaW))
            naList.append(na)
            bList.append(float(b))
            impactList.append(float(impact))
            azList.append(az)
            incList.append(float(inc))
            fancyIncList.append(fancyInc)
            cosIncList.append(cosInc)
            cosFancyIncList.append(cosFancyInc)
            paList.append(pa)
            vcorrList.append(vcorr)
            majList.append(maj)
            difList.append(float(vel_diff))
            envList.append(float(env))
            morphList.append(morph)
            m15List.append(m15)
            virList.append(virialRadius)
            likeList.append(likelihood)
            likem15List.append(likelihoodm15)

    results.close()
        
    
##########################################################################################
##########################################################################################
    
    # lists for the full galaxy dataset
    
    allPA = fullDict['allPA']
    allInclinations = fullDict['allInclinations']
    allCosInclinations = fullDict['allCosInclinations']
    allFancyInclinations = fullDict['allFancyInclinations']
    allCosFancyInclinations = fullDict['allCosFancyInclinations']
    
    total = 0
    totalNo = 0
    totalYes = 0
    totalIsolated = 0
    totalGroup = 0
    

########################################################################################
########################################################################################

    # plot histograms of the cos(inclinations) for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_cosInc = False
    
    if plot_dist_cosInc:
    
        '''
        Here's an example:
        
        n1 = 200
        n2 = 300
        
        rvs1 = stats.norm.rvs(size=n1, loc=0., scale=1)
        rvs2 = stats.norm.rvs(size=n2, loc=0.5, scale=1.5)
        
        
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'ans: ',ans
        print
        
        '''
    
        # define the datasets
        rvs1all = cosIncList
        rvs1 = []
        rvs2 = allCosInclinations

        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
        # do the K-S test and print the results
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'KS for cosIncList vs all: ',ans 
        
        
        # plot the distributions
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        xlim(0,1)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        xlim(0,1)
        
        show()

        

########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_inc = False
    
    if plot_dist_inc:
    
        # define the datasets
        rvs1all = incList
        rvs1 = []
        rvs2 = allInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
                
        # perform the K-S test
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'KS for incList vs all: ',ans 
        
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        
        show()
        


########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancyInc = False
    
    if plot_dist_fancyInc:
    
        # define the datasets
        rvs1all = fancyIncList
        rvs1 = []
        rvs2 = allFancyInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
                
        # perform the K-S test
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'KS for fancyIncList vs all: ',ans 
        
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        
        show()
        

########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancyCosInc = False
    
    if plot_dist_fancyCosInc:
    
        # define the datasets
        rvs1all = cosFancyIncList
        rvs1 = []
        rvs2 = allCosFancyInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
                
        # perform the K-S test
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'KS for cosFancyIncList vs all: ',ans
        
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        
        show()


########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancyCosInc_red_blue = False
    
    if plot_dist_fancyCosInc_red_blue:
    
        blues = []
        reds = []
        all = allCosFancyInclinations
        
        # remove null "-99" values and split into red and blue groups
        for i,d in zip(cosFancyIncList,difList):
            # check for != -99
            if i>=0:
                # d = vel_galaxy - vel_absorber --> positive = blue shifted absorber (closer to us)
                if d>=0:
                    blues.append(i)
                if d<0:
                    reds.append(i)
    
                
        # perform the K-S test
        ans1 = stats.ks_2samp(blues, reds)
        ans1a = stats.anderson_ksamp([blues,reds])
        print 'KS for blue vs red: ',ans1
        print 'AD for blue vs red: ',ans1a
        
        ans2 = stats.ks_2samp(blues, all)
        print 'KS for blue vs all: ',ans2
        
        ans3 = stats.ks_2samp(reds, all)
        print 'KS for red vs all: ',ans3
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(311)
        plot1 = hist(blues,bins=bins)
        title('blueshifted Cos(fancy_inc)')
        
        ax2 = fig.add_subplot(312)
        plot2 = hist(reds,bins=bins)
        title('redshifted Cos(fancy_inc)')
        
        ax3 = fig.add_subplot(313)
        plot3 = hist(all,bins=bins)
        title('Full galaxy table Cos(fancy_inc)')
        
        show()



########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_cosInc_red_blue = False
    
    if plot_dist_cosInc_red_blue:
    
        blues = []
        reds = []
        all = allCosInclinations
        
        # remove null "-99" values and split into red and blue groups
        for i,d in zip(cosIncList,difList):
            # check for != -99
            if i>=0:
                # d = vel_galaxy - vel_absorber --> positive = blue shifted absorber (closer to us)
                if d>=0:
                    blues.append(i)
                if d<0:
                    reds.append(i)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(blues, reds)
        ans1a = stats.anderson_ksamp([blues,reds])
        print 'KS for blue vs red: ',ans1
        print 'AD for blue vs red: ',ans1a
        
        ans2 = stats.ks_2samp(blues, all)
        print 'KS for blue vs all: ',ans2
        
        ans3 = stats.ks_2samp(reds, all)
        print 'KS for red vs all: ',ans3
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(311)
        plot1 = hist(blues,bins=bins)
        title('blueshifted Cos(inc)')
        
        ax2 = fig.add_subplot(312)
        plot2 = hist(reds,bins=bins)
        title('redshifted Cos(inc)')
        
        ax3 = fig.add_subplot(313)
        plot3 = hist(all,bins=bins)
        title('Full galaxy table Cos(inc)')
        
        show()


########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancy_inc_red_blue = True
    
    if plot_dist_fancy_inc_red_blue:
    
        blues = []
        reds = []
        all = allFancyInclinations
        
        # remove null "-99" values and split into red and blue groups
        for i,d in zip(fancyIncList,difList):
            # check for != -99
            if i>=0:
                # d = vel_galaxy - vel_absorber --> positive = blue shifted absorber (closer to us)
                if d>=0:
                    blues.append(i)
                if d<0:
                    reds.append(i)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(blues, reds)
        ans1a = stats.anderson_ksamp([blues,reds])
        print 'KS for blue vs red: ',ans1
        print 'AD for blue vs red: ',ans1a
        
        ans2 = stats.ks_2samp(blues, all)
        print 'KS for blue vs all: ',ans2
        
        ans3 = stats.ks_2samp(reds, all)
        print 'KS for red vs all: ',ans3
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(311)
        plot1 = hist(blues,bins=bins)
        title('blueshifted fancy_inc')
        
        ax2 = fig.add_subplot(312)
        plot2 = hist(reds,bins=bins)
        title('redshifted fancy_inc')
        
        ax3 = fig.add_subplot(313)
        plot3 = hist(all,bins=bins)
        title('Full galaxy table fancy_inc')
        
        show()
Exemple #47
0
def ADTest(x,y):
	try:
		return anderson_ksamp([x,y])[2]
	except Exception as e:
		print e
		return -1
def main():
        
    if getpass.getuser() == 'David':
        pickleFilename = '/Users/David/Research_Documents/inclination/pilotData.p'
        saveDirectory = '/Users/David/Research_Documents/inclination/pilot_paper/figures'

    elif getpass.getuser() == 'frenchd':
        pickleFilename = '/usr/users/frenchd/inclination/pilotData.p'
        saveDirectory = '/usr/users/frenchd/inclination/pilot_paper/figures'

    else:
        print 'Could not determine username. Exiting.'
        sys.exit()
    
    pickleFile = open(pickleFilename,'rU')
    fullDict = pickle.load(pickleFile)
    
    pickleFile.close()
    
    
    # save each plot?
    save = False
    
    
    # overall structure: fullDict is a dictionary with all the lines and their data in it
    # separated into 'associated' and 'ambiguous' as the two keys. Associated contains
    # all the lists of data for lines associated with a galaxy. Ambiguous contains all
    # the lists of data for lines not unambiguously associated (could be many galaxies
    # or none)
    
##########################################################################################
##########################################################################################
    # all the lists to be used for associated lines
    
    lyaVList = fullDict['lyaVList']
    lyaWList = fullDict['lyaWList']
    lyaErrorList = fullDict['lyaErrorList']
    naList = fullDict['naList']
    bList = fullDict['bList']
    impactList = fullDict['impactList']
    azList = fullDict['azList']
    newAzList = fullDict['newAzList']
    incList = fullDict['incList']
    fancyIncList = fullDict['fancyIncList']
    cosIncList = fullDict['cosIncList']
    fancyCosIncList = fullDict['fancyCosIncList']
    paList = fullDict['paList']
    vcorrList = fullDict['vcorrList']
    majList = fullDict['majList']
    difList = fullDict['difList']
    envList = fullDict['envList']
    morphList = fullDict['morphList']
    galaxyNameList = fullDict['galaxyNameList']
    
    
    lyaV_blue = []
    lyaV_red = []
    lyaW_blue = []
    lyaW_red = []
    lyaErr_blue = []
    lyaErr_red = []
    na_blue = []
    na_red = []
    b_blue = []
    b_red = []
    impact_blue = []
    impact_red = []
    az_blue = []
    az_red = []
    newAz_blue = []
    newAz_red = []
    inc_blue = []
    inc_red = []
    fancyInc_blue = []
    fancyInc_red = []
    cosInc_blue = []
    cosInc_red = []
    fancyCosInc_blue = []
    fancyCosInc_red = []
    pa_blue = []
    pa_red = []
    vcorr_blue = []
    vcorr_red = []
    maj_blue = []
    maj_red = []
    dif_blue = []
    dif_red = []
    env_blue = []
    env_red = []
    morph_blue = []
    morph_red = []
    
    c = -1
    for d in difList:
        c +=1
        if d > 0:
            # blueshifted absorption
            lyaV_blue.append(lyaVList[c])
            lyaW_blue.append(lyaWList[c])
            lyaErr_blue.append(lyaErrorList[c])
            na_blue.append(naList[c])
            b_blue.append(bList[c])
            impact_blue.append(impactList[c])
            az_blue.append(azList[c])
            newAz_blue.append(newAzList[c])
            inc_blue.append(incList[c])
            fancyInc_blue.append(fancyIncList[c])
            cosInc_blue.append(cosIncList[c])
            fancyCosInc_blue.append(fancyCosIncList[c])
            pa_blue.append(paList[c])
            vcorr_blue.append(vcorrList[c])
            maj_blue.append(majList[c])
            dif_blue.append(difList[c])
            env_blue.append(envList[c])
            morph_blue.append(morphList[c])
            
        else:
            # redshifted absorption
            lyaV_red.append(lyaVList[c])
            lyaW_red.append(lyaWList[c])
            lyaErr_red.append(lyaErrorList[c])
            na_red.append(naList[c])
            b_red.append(bList[c])
            impact_red.append(impactList[c])
            az_red.append(azList[c])
            newAz_red.append(newAzList[c])
            inc_red.append(incList[c])
            fancyInc_red.append(fancyIncList[c])
            cosInc_red.append(cosIncList[c])
            fancyCosInc_red.append(fancyCosIncList[c])
            pa_red.append(paList[c])
            vcorr_red.append(vcorrList[c])
            maj_red.append(majList[c])
            dif_red.append(difList[c])
            env_red.append(envList[c])
            morph_red.append(morphList[c])
    
        
        
##########################################################################################
##########################################################################################
    # all the lists to be used for ambiguous lines
    
    lyaVListAmb = fullDict['lyaVListAmb']
    lyaWListAmb = fullDict['lyaWListAmb']
    lyaErrorListAmb = fullDict['lyaErrorListAmb']
    naListAmb = fullDict['naListAmb']
    bListAmb = fullDict['bListAmb']
    impactListAmb = fullDict['impactListAmb']
    azListAmb = fullDict['azListAmb']
    newAzListAmb = fullDict['newAzListAmb']
    incListAmb = fullDict['incListAmb']
    fancyIncListAmb = fullDict['fancyIncListAmb']
    cosIncListAmb = fullDict['cosIncListAmb']
    fancyCosIncListAmb = fullDict['fancyCosIncListAmb']
    paListAmb = fullDict['paListAmb']
    vcorrListAmb = fullDict['vcorrListAmb']
    majListAmb = fullDict['majListAmb']
    difListAmb = fullDict['difListAmb']
    envListAmb = fullDict['envListAmb']
    morphListAmb = fullDict['morphListAmb']
    galaxyNameListAmb = fullDict['galaxyNameListAmb']
    
    lyaV_blueAmb = []
    lyaV_redAmb = []
    lyaW_blueAmb = []
    lyaW_redAmb = []
    lyaErr_blueAmb = []
    lyaErr_redAmb = []
    na_blueAmb = []
    na_redAmb = []
    b_blueAmb = []
    b_redAmb = []
    impact_blueAmb = []
    impact_redAmb = []
    az_blueAmb = []
    az_redAmb = []
    newAz_blueAmb = []
    newAz_redAmb = []
    inc_blueAmb = []
    inc_redAmb = []
    fancyInc_blueAmb = []
    fancyInc_redAmb = []
    cosInc_blueAmb = []
    cosInc_redAmb = []
    fancyCosInc_blueAmb = []
    fancyCosInc_redAmb = []
    pa_blueAmb = []
    pa_redAmb = []
    vcorr_blueAmb = []
    vcorr_redAmb = []
    maj_blueAmb = []
    maj_redAmb = []
    dif_blueAmb = []
    dif_redAmb = []
    env_blueAmb = []
    env_redAmb = []
    morph_blueAmb = []
    morph_redAmb = []
    
    
    c = -1
    for d in difListAmb:
        c +=1
        if d > 0:
            # blueshifted absorption
            lyaV_blueAmb.append(lyaVListAmb[c])
            lyaW_blueAmb.append(lyaWListAmb[c])
            lyaErr_blueAmb.append(lyaErrorListAmb[c])
            na_blueAmb.append(naListAmb[c])
            b_blueAmb.append(bListAmb[c])
            impact_blueAmb.append(impactListAmb[c])
            az_blueAmb.append(azListAmb[c])
            newAz_blueAmb.append(newAzListAmb[c])
            inc_blueAmb.append(incListAmb[c])
            fancyInc_blueAmb.append(fancyIncListAmb[c])
            cosInc_blueAmb.append(cosIncListAmb[c])
            fancyCosInc_blueAmb.append(fancyCosIncListAmb[c])
            pa_blueAmb.append(paListAmb[c])
            vcorr_blueAmb.append(vcorrListAmb[c])
            maj_blueAmb.append(majListAmb[c])
            dif_blueAmb.append(difListAmb[c])
            env_blueAmb.append(envListAmb[c])
            morph_blueAmb.append(morphListAmb[c])
            
        else:
            # redshifted absorption
            lyaV_redAmb.append(lyaVListAmb[c])
            lyaW_redAmb.append(lyaWListAmb[c])
            lyaErr_redAmb.append(lyaErrorListAmb[c])
            na_redAmb.append(naListAmb[c])
            b_redAmb.append(bListAmb[c])
            impact_redAmb.append(impactListAmb[c])
            az_redAmb.append(azListAmb[c])
            newAz_redAmb.append(newAzListAmb[c])
            inc_redAmb.append(incListAmb[c])
            fancyInc_redAmb.append(fancyIncListAmb[c])
            cosInc_redAmb.append(cosIncListAmb[c])
            fancyCosInc_redAmb.append(fancyCosIncListAmb[c])
            pa_redAmb.append(paListAmb[c])
            vcorr_redAmb.append(vcorrListAmb[c])
            maj_redAmb.append(majListAmb[c])
            dif_redAmb.append(difListAmb[c])
            env_redAmb.append(envListAmb[c])
            morph_redAmb.append(morphListAmb[c])
    
    
##########################################################################################
##########################################################################################
    
    # lists for the full galaxy dataset
    
    allPA = fullDict['allPA']
    allInclinations = fullDict['allInclinations']
    allCosInclinations = fullDict['allCosInclinations']
    allFancyInclinations = fullDict['allFancyInclinations']
    allFancyCosInclinations = fullDict['allCosFancyInclinations']
    
    total = 0
    totalNo = 0
    totalYes = 0
    totalIsolated = 0
    totalGroup = 0
    

########################################################################################
########################################################################################

    # plot histograms of the cos(inclinations) for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_cosInc = False
    
    if plot_dist_cosInc:
    
        '''
        Here's an example:
        
        n1 = 200
        n2 = 300
        
        rvs1 = stats.norm.rvs(size=n1, loc=0., scale=1)
        rvs2 = stats.norm.rvs(size=n2, loc=0.5, scale=1.5)
        
        
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'ans: ',ans
        print
        
        '''
    
        # define the datasets
        rvs1all = cosIncList
        rvs1 = []
        rvs2 = allCosInclinations

        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
        # do the K-S test and print the results
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'real ans: ',ans 
        
        
        # plot the distributions
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        xlim(0,1)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        xlim(0,1)
        
        show()

        

########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_inc = False
    
    if plot_dist_inc:
    
        # define the datasets
        rvs1all = incList
        rvs1 = []
        rvs2 = allInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
                
        # perform the K-S test
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'real ans: ',ans 
        
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        
        show()
        


########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancyInc = False
    
    if plot_dist_fancyInc:
    
        # define the datasets
        rvs1all = fancyIncList
        rvs1 = []
        rvs2 = allFancyInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
                
        # perform the K-S test
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'real ans: ',ans 
        
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        
        show()
        

########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancyCosInc = False
    
    if plot_dist_fancyCosInc:
    
        # define the datasets
        rvs1all = fancyCosIncList
        rvs1 = []
        rvs2 = allFancyCosInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)
                
                
        # perform the K-S test
        ans = stats.ks_2samp(rvs1, rvs2)
        print 'real ans: ',ans 
        
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        
        ax2 = fig.add_subplot(212)
        plot1 = hist(rvs2,bins=bins)
        
        show()


########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_fancyCosInc_red_blue = False
    
    if plot_dist_fancyCosInc_red_blue:
    
        # define the datasets
        rvs1all = fancyCosInc_blue
        rvs1 = []
        rvs2all = fancyCosInc_red
        rvs2 = []
        rvs3 = allFancyCosInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red: ',ans1
        
        ans2 = stats.ks_2samp(rvs1, rvs3)
        print 'blue vs all: ',ans2
        
        ans3 = stats.ks_2samp(rvs2, rvs3)
        print 'red vs all: ',ans3
        
        # plot the distributions 
        fig = figure()
        
        bins = 15
        
        ax1 = fig.add_subplot(311)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted Cos(fancy_inc)')
        
        ax2 = fig.add_subplot(312)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted Cos(fancy_inc)')
        
        ax3 = fig.add_subplot(313)
        plot3 = hist(rvs3,bins=bins)
        title('Full galaxy table Cos(fancy_inc)')
        
        show()



########################################################################################
########################################################################################

    # plot histograms of the inclinations for both associated galaxies and the 
    # full galaxy data set, combining both redshifted and blueshifted
    plot_dist_cosInc_red_blue = False
    
    if plot_dist_cosInc_red_blue:
    
        # define the datasets
        rvs1all = cosInc_blue
        rvs1 = []
        rvs2all = cosInc_red
        rvs2 = []
        rvs3 = allCosInclinations
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red, KS: ',ans1
        ans1a = stats.anderson_ksamp([rvs1,rvs2])
        print 'blue vs red, A-D:', ans1a
        print
        
        ans2 = stats.ks_2samp(rvs1, rvs3)
        print 'blue vs all, KS: ',ans2
        ans2a = stats.anderson_ksamp([rvs1,rvs3])
        print 'blue vs all, A-D: ',ans2a
        print
        
        ans3 = stats.ks_2samp(rvs2, rvs3)
        print 'red vs all, KS: ',ans3
        ans3a = stats.anderson_ksamp([rvs2,rvs3])
        print 'red vs all, A-D: ',ans3a
        print
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(311)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted Cos(inc)')
        
        ax2 = fig.add_subplot(312)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted Cos(inc)')
        
        ax3 = fig.add_subplot(313)
        plot3 = hist(rvs3,bins=bins)
        title('Full galaxy table Cos(inc)')
        
        show()



        
#         fig = figure()
# #         subplots_adjust(hspace=0.200)
#         ax = fig.add_subplot(211)
#         bins = [0,.10,.20,.30,.40,.50,.60,.70,.80,.90]
#         subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)
# 
#         plot1 = hist(cosIncList,bins=bins,histtype='bar')
#         title('Absorber-associated galaxies cos(inclination)')
#         xlabel('Inclination (deg)')
#         ylabel('Number')
# 
#         ax = fig.add_subplot(212)
#         bins = [0,.10,.20,.30,.40,.50,.60,.70,.80,.90]
#         plot1 = hist(allCosInclinations,bins=bins,histtype='bar')
#         title('Full galaxy sample cos(inclination)')
#         xlabel('Inclination (deg)')
#         ylabel('Number')
# #         tight_layout()
# 
#         if save:
#             savefig('{0}/inc_dist.pdf'.format(saveDirectory),format='pdf')
#         else:
#             show()



########################################################################################
########################################################################################

    # plot histograms of the azimuths of the red vs blue shifted absorber samples
    # conduct KS and AD tests of these distributions
    # 
    
    plot_dist_az_red_blue = False
    
    if plot_dist_az_red_blue:
    
        # define the datasets
        rvs1all = newAz_blue
        rvs1 = []
        rvs2all = newAz_red
        rvs2 = []
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red, KS: ',ans1
        ans1a = stats.anderson_ksamp([rvs1,rvs2])
        print 'blue vs red, A-D:', ans1a
        print
        
        # plot the distributions
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted Azimuths')
        
        ax2 = fig.add_subplot(212)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted Azimuths')
        
        show()
    
    
    
########################################################################################
########################################################################################

    # plot histograms of the azimuths of the red vs blue shifted absorber samples
    # conduct KS and AD tests of these distributions
    # 
    
    plot_dist_ew_red_blue = False
    
    if plot_dist_ew_red_blue:
    
        # define the datasets
        rvs1all = lyaW_blue
        rvs1 = []
        rvs2all = lyaW_red
        rvs2 = []
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red, KS: ',ans1
        ans1a = stats.anderson_ksamp([rvs1,rvs2])
        print 'blue vs red, A-D:', ans1a
        print
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted LyaW')
        
        ax2 = fig.add_subplot(212)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted LyaW')
        
        show()
        
        
########################################################################################
########################################################################################

    # plot histograms of the impact parameters of the red vs blue shifted absorber samples
    # conduct KS and AD tests of these distributions
    # 
    
    plot_dist_impact_red_blue = False
    
    if plot_dist_impact_red_blue:
    
        # define the datasets
        rvs1all = impact_blue
        rvs1 = []
        rvs2all = impact_red
        rvs2 = []
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red, KS: ',ans1
        ans1a = stats.anderson_ksamp([rvs1,rvs2])
        print 'blue vs red, A-D:', ans1a
        print
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted impact parameter (kpc)')
        
        ax2 = fig.add_subplot(212)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted impact parameter (kpc)')
        
        show()
        
        

########################################################################################
########################################################################################

    # plot histograms of the b-parameter of the red vs blue shifted absorber samples
    # conduct KS and AD tests of these distributions
    # 
    
    plot_dist_b_red_blue = False
    
    if plot_dist_b_red_blue:
    
        # define the datasets
        rvs1all = b_blue
        rvs1 = []
        rvs2all = b_red
        rvs2 = []
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red, KS: ',ans1
        ans1a = stats.anderson_ksamp([rvs1,rvs2])
        print 'blue vs red, A-D:', ans1a
        print
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted dopplar b-parameter')
        
        ax2 = fig.add_subplot(212)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted dopplar b-parameter')
        
        show()
        
        

########################################################################################
########################################################################################

    # plot histograms of the major axis of the red vs blue shifted absorber samples
    # conduct KS and AD tests of these distributions
    # 
    
    plot_dist_maj_red_blue = False
    
    if plot_dist_maj_red_blue:
    
        # define the datasets
        rvs1all = maj_blue
        rvs1 = []
        rvs2all = maj_red
        rvs2 = []
        
        # remove -99 'no-data' values
        for i in rvs1all:
            if float(i) >=0:
                rvs1.append(i)

        for k in rvs2all:
            if float(k) >=0:
                rvs2.append(k)
                
                
        # perform the K-S test
        ans1 = stats.ks_2samp(rvs1, rvs2)
        print 'blue vs red, KS: ',ans1
        ans1a = stats.anderson_ksamp([rvs1,rvs2])
        print 'blue vs red, A-D:', ans1a
        print
        
        # plot the distributions 
        fig = figure(figsize=(8,8))
        subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

        bins = 15
        
        ax1 = fig.add_subplot(211)
        plot1 = hist(rvs1,bins=bins)
        title('blueshifted major axis (kpc)')
        
        ax2 = fig.add_subplot(212)
        plot2 = hist(rvs2,bins=bins)
        title('redshifted major axis (kpc)')
        
        show()


########################################################################################
########################################################################################

    # plot histograms of the major axis of the red vs blue shifted absorber samples
    # conduct KS and AD tests of these distributions
    # 
    
    plot_dist_morphs = True
    
    if plot_dist_morphs:
    
        # define the datasets
        rvs1all = morph_blue
        rvs1 = []
        rvs2all = morph_red
        rvs2 = []
        
        print 'blue absorbers morphology: ',
        for b in morph_blue:
            print b
        
        print
        print
        print 'red absorbers morphology: ',
        for r in morph_red:
            print r
Exemple #49
0
                 y = w * normpdf(x, m, np.sqrt(c))[0]
                 ax.plot(x, y * (bins[1] - bins[0]), "--", c=colors[k])
                 ax.arrow(float(m), 0, 0, 0.12 * ylims[j],
                          head_width=0.02 * (xlims[j][1] - xlims[j][0]),
                          head_length=0.05 * ylims[j],
                          fc=colors[k], ec=colors[k])
                 # ax.axvline(m, c=colors[k], ls="--", lw=1.5)
                 if l == 0:
                     line = [r"\multirow{{{1}}}{{*}}{{{0}}}".format(parameters[j],
                      len(d.best.means_)), tablab[k], len(v[cond]), round(m, 2),
                             round(np.sqrt(c),2), round(w,2)]
                 else:
                     line = [" ", " ", round(m, 2), round(np.sqrt(c),2),
                             round(w,2)]
             if j in [0,2]:
                 ax.set_ylabel(r"Fraction of total")
             ax.set_ylim(0, ylims[j])
             #     print " & ".join([str(xx) for xx in line]) + "\\\\"
             # print  "\multicolumn{6}{c}{- - - - - -}\\\\"
     vs.append(sp10[j])
     print len(vs[0]), len(vs[1]), len(vs[2])
     print "NE + SYM: ", ks_2samp(vs[0], vs[1])
     print "NE + VIRGO/FORNAX: ", ks_2samp(vs[0], vs[2])
     print "SYM + VIRGO/FORNAX: ", ks_2samp(vs[1], vs[2])
     print "NE + SYM: ", anderson_ksamp((vs[0], vs[1]))
     print  "NE + VIRGO/FORNAX: ", anderson_ksamp((vs[0], vs[2]))
     print "SYM + VIRGO/FORNAX: ", anderson_ksamp((vs[1], vs[2]))
     print
 plt.pause(0.001)
 plt.savefig(os.path.join(os.getcwd(), "figs/hist_outer.png"), )
 # plt.show()
Exemple #50
0
 def time_anderson_ksamp(self):
     with warnings.catch_warnings():
         warnings.simplefilter('ignore', UserWarning)
         anderson_ksamp(self.rand)
def main():
    # assuming 'theFile' contains one name per line, read the file
    
    if getpass.getuser() == 'frenchd':
#         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pilot_paper_code/pilotData2.p'
#         resultsFilename = '/Users/frenchd/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv'
#         saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/pilot_paper_code/plots6/'
#         WS09data = '/Users/frenchd/Research/inclination/git_inclination/WS2009_lya_data.tsv'
        
#         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/rotation_paper/pickleSALT.p'
#         saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/rotation_paper/figures/'

#         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALTcut.p'
        pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALT_14.p'
        gtPickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pickleGT.p'
        saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/plotting_code/figs/'


    else:
        print 'Could not determine username. Exiting.'
        sys.exit()
    
    # use the old pickle file to get the full galaxy dataset info
    pickleFile = open(pickleFilename,'rU')
    fullDict = pickle.load(pickleFile)
    pickleFile.close()
    
    # for the whole galaxy table:
    gtPickleFile = open(gtPickleFilename,'rU')
    gtDict = pickle.load(gtPickleFile)
    gtPickleFile.close()
    
    
    # save each plot?
    save = False
    
#     results = open(resultsFilename,'rU')
#     reader = csv.DictReader(results)
    
#     WS = open(WS09data,'rU')
#     WSreader = csv.DictReader(WS,delimiter=';')
    
    virInclude = False
    cusInclude = False
    finalInclude = 1
    
    maxEnv = 3000
    minL = 0.001
    maxEnv = 100
    
    # if match, then the includes in the file have to MATCH the includes above. e.g., if 
    # virInclude = False, cusInclude = True, finalInclude = False, then only systems
    # matching those three would be included. Otherwise, all cusInclude = True would be included
    # regardless of the others
    match = False
    
    # all the lists to be used for associated lines
    raList = []
    decList = []
    lyaVList = []
    lyaWList = []
    lyaErrList = []
    naList = []
    bList = []
    impactList = []
    azList = []
    incList = []
    fancyIncList = []
    cosIncList = []
    cosFancyIncList = []
    paList = []
    vcorrList = []
    majList = []
    difList = []
    envList = []
    morphList = []
    m15List = []
    virList = []
    likeList = []
    likem15List = []
    nameList = []
    
    # for ambiguous lines
    lyaVAmbList = []
    lyaWAmbList = []
    envAmbList = []
    ambAGNnameList = []
    
    
    # for include = 2 lines
    lyaV_2List = []
    lyaW_2List = []
    env_2List = []
    vir_2List = []
    impact_2List = []
    like_2List = []
    
    # for include = 3 lines
    lyaV_3List = []
    lyaW_3List = []
    env_3List = []
    vir_3List = []
    impact_3List = []
    like_3List = []
    
    
    # for all lines with a galaxy within 500 kpc
    lyaV_nearestList = []
    lyaW_nearestList = []
    env_nearestList = []
    impact_nearestList = []
    diam_nearestList = []
    vir_nearestList = []
    cus_nearestList = []
    
    
    # for all
    lyaV_all = []
    lyaW_all = []
    agnName_all = []
    env_all = []
    AGNnameList = []

    
    # WS lists
#     WSvcorr = []
#     WSdiam = []
#     WSimpact =[]
#     WSew = []
#     WSvel = []
#     WSlya = []
#     WSvel_dif = []
#     WSvir = []
#     WSlike = []
#     
#     l_min = 0.001
# 
#     for w in WSreader:
#         vcorr = w['HV']
#         diam = w['Diam']
#         rho = w['rho']
#         ew = w['EWLya']
#         vel = w['LyaVel']
#         lya = w['Lya']
#         
#         if lya == 'Lya  ' and isNumber(diam) and isNumber(ew) and isNumber(rho):
#             if float(rho) <=500.0:
#                 # this is a single galaxy association
#                 vir = calculateVirialRadius(float(diam))
#                 
#                 vel_dif = float(vcorr) - float(vel)
#     
#                 # try this "sphere of influence" value instead
#                 m15 = float(diam)**1.5
# 
#                 # first for the virial radius
#                 likelihood = math.exp(-(float(rho)/vir)**2) * math.exp(-(vel_dif/200.)**2)
#                 
#                 if vir>= float(rho):
#                     likelihood = likelihood*2
#                     
#                 # then for the second 'virial like' m15 radius
#                 likelihoodm15 = math.exp(-(float(rho)/m15)**2) * math.exp(-(vel_dif/200.)**2)
#                 
#                 if m15>= float(rho):
#                     likelihoodm15 = likelihoodm15*2
#                     
#                 if likelihood <= likelihoodm15:
#                     likelihood = likelihoodm15
#                     
#                 WSlike.append(likelihood)
#                 
# #                 l_min=0
#                 
#                 if likelihood >= l_min:
#                 
#                     WSvcorr.append(float(vcorr))
#                     WSdiam.append(float(diam))
#                     WSvir.append(vir)
#                     WSimpact.append(float(rho))
#                     WSew.append(float(ew))
#                     WSvel.append(float(vel))
#                     WSlya.append(lya)
#                     WSvel_dif.append(vel_dif)
    
    
    
    targetNameL= fullDict['targetName']
    galaxyNameL = fullDict['galaxyName']
    environmentL = fullDict['environment']
    RA_agnL = fullDict['RA_agn']
    Dec_agnL = fullDict['Dec_agn']
    RA_galL = fullDict['RA_gal']
    Dec_galL = fullDict['Dec_gal']
    likelihoodL = fullDict['likelihood']
    likelihood_cusL = fullDict['likelihood_cus']
    virialRadiusL = fullDict['virialRadius']
    cusL = fullDict['cus']
    impactParameterL = fullDict['impact']
    vcorrL = fullDict['vcorr']
    radialVelocityL = fullDict['radialVelocity']
    vel_diffL = fullDict['vel_diff']
    distGalaxyL = fullDict['distGalaxy']
    majorAxisL = fullDict['majorAxis']
    minorAxisL = fullDict['minorAxis']
    inclinationL = fullDict['inclination']
    positionAngleL = fullDict['PA']
    azimuthL = fullDict['azimuth']
    RC3flagL = fullDict['RC3flag']
    RC3typeL = fullDict['RC3type']
    RC3incL = fullDict['RC3inc']
    RC3paL = fullDict['RC3pa']
    final_morphologyL = fullDict['final_morphology']
    includeL = fullDict['include']
    include_virL = fullDict['include_vir']
    include_customL = fullDict['include_custom']
    Lya_vL = fullDict['Lya_v']
    vlimitsL = fullDict['vlimits']
    Lya_WL = fullDict['Lya_W']
    NaL = fullDict['Na']
    bL = fullDict['b']
    identifiedL = fullDict['identified']
    sourceL = fullDict['source']
    
    print 'initial len(Lya_vL): ',len(Lya_vL)
    print

    i = -1
    for include,include_vir,include_cus in zip(includeL,include_virL,include_customL):
        i+=1
        go = False
        if match:
            if virInclude == include_vir and cusInclude == include_cus:
                go = True
            else:
                go = False
                
        else:
            if virInclude and include_vir:
                go = True
            elif cusInclude and include_cus:
                go = True
            elif finalInclude and include:
                go = True
            else:
                go = False

        galaxyName = galaxyNameL[i]
        targetName = targetNameL[i]
        RA_agn = RA_agnL[i]
        Dec_agn = Dec_agnL[i]
        RA_gal = RA_galL[i]
        Dec_gal = Dec_galL[i]
        lyaV = Lya_vL[i]
        lyaW = Lya_WL[i]
        lyaW_err = lyaW*0.1
        env = environmentL[i]
        impact = impactParameterL[i]
        galaxyDist = distGalaxyL[i]
        pa = positionAngleL[i]
        RC3pa = RC3paL[i]
        morph = final_morphologyL[i]
        vcorr = vcorrL[i]
        maj = majorAxisL[i]
        minor = minorAxisL[i]
        inc = inclinationL[i]
        az = azimuthL[i]
        b = bL[i]
        b_err = b*0.1
        na = NaL[i]
        na_err = na*0.1
        likelihood = likelihoodL[i]
        likelihoodm15 = likelihood_cusL[i]
        virialRadius = virialRadiusL[i]
        m15 = cusL[i]
        vel_diff = vel_diffL[i]
        source = sourceL[i]
        
        lyaV_all.append(float(lyaV))
        lyaW_all.append(float(lyaW))
        env_all.append(int(env))
        AGNnameList.append(targetName)
        
        
        # for ambiguous lines
        if include == 0:
            lyaVAmbList.append(float(lyaV))
            lyaWAmbList.append(float(lyaW))
            envAmbList.append(float(env))
            ambAGNnameList.append(targetName)
            
            
        print 'include = ', include
        if include == 2:
            print 'include2 = ',include
            # for include = 2 lines
            lyaV_2List.append(float(lyaV))
            lyaW_2List.append(float(lyaW))
            env_2List.append(float(env))
            vir_2List.append(float(virialRadius))
            impact_2List.append(float(impact))
            like_2List.append(float(likelihood))
    
        if include == 3:
            # for include = 3 lines
            lyaV_3List.append(float(lyaV))
            lyaW_3List.append(float(lyaW))
            env_3List.append(float(env))
            vir_3List.append(float(virialRadius))
            impact_3List.append(float(impact))
            like_3List.append(float(likelihood))
            
        # for all absorbers with a galaxy within 500kpc
        if isNumber(impact):
            lyaV_nearestList.append(float(lyaV))
            lyaW_nearestList.append(float(lyaW))
            env_nearestList.append(float(env))
            impact_nearestList.append(float(impact))
            diam_nearestList.append(float(maj))
            nameList.append(galaxyName)
            vir_nearestList.append(float(virialRadius))
            cus_nearestList.append(float(m15))            
            
            
#         if go and source == 'salt':
#         if go and source == 'pilot':
        if go and env <=maxEnv:
#         if go:
            if isNumber(RC3pa) and not isNumber(pa):
                pa = RC3pa
            
            if isNumber(inc):
                cosInc = cos(float(inc) * pi/180.)
                
                if isNumber(maj) and isNumber(minor):
                    q0 = 0.2
                    fancyInc = calculateFancyInclination(maj,minor,q0)
                    cosFancyInc = cos(fancyInc * pi/180)
                else:
                    fancyInc = -99
                    cosFancyInc = -99
            else:
                cosInc = -99
                inc = -99
                fancyInc = -99
                cosFancyInc = -99
            
            # all the lists to be used for associated lines
            if float(env) <= maxEnv and float(likelihood) >= minL:
                raList.append(RA_gal)
                decList.append(Dec_gal)
                lyaVList.append(float(lyaV))
                lyaWList.append(float(lyaW))
                lyaErrList.append(float(lyaW_err))
                naList.append(na)
                bList.append(float(b))
                impactList.append(float(impact))
                print az
                azList.append(float(az))
                incList.append(float(inc))
                fancyIncList.append(fancyInc)
                cosIncList.append(cosInc)
                cosFancyIncList.append(cosFancyInc)
                paList.append(pa)
                vcorrList.append(vcorr)
                majList.append(maj)
                difList.append(float(vel_diff))
                envList.append(float(env))
                morphList.append(morph)
                m15List.append(m15)
                virList.append(virialRadius)
                likeList.append(likelihood)
                likem15List.append(likelihoodm15)
                nameList.append(galaxyName)

        
    # lists for the full galaxy dataset
    majorAxisL = gtDict['majorAxis']
    incL = gtDict['inc']
    adjustedIncL = gtDict['adjustedInc']
    paL = gtDict['PA']
    BmagL = gtDict['Bmag']
    Bmag_sdssL = gtDict['Bmag_sdss']
    RID_medianL = gtDict['RID_median']
    RID_meanL = gtDict['RID_mean']
    RID_stdL = gtDict['RID_std']
    VhelL = gtDict['Vhel']
    RAdegL = gtDict['RAdeg']
    DEdegL = gtDict['DEdeg']
    NameL= gtDict['Name']
    
    allPA = paL
    allInclinations = []
    allCosInclinations = []

#     print 'type: ',type(incL)
    for i in incL:
        if i != -99:
            i = float(i)
            allInclinations.append(i)
            
            i2 = pi/180. * i
            cosi2 = cos(i)
            allCosInclinations.append(cosi2)
            
    allFancyInclinations = []
    allCosFancyCosInclinations = []
    for i in adjustedIncL:
        if i != -99:
            i = float(i)

            allFancyInclinations.append(i)
            
            i2 = pi/180. * i
            cosi2 = cos(i)
            allCosFancyCosInclinations.append(cosi2)
            
    allDiameter = majorAxisL

    print 'finished with this shit'

    total = 0
    totalNo = 0
    totalYes = 0
    totalIsolated = 0
    totalGroup = 0
    

########################################################################################
#########################################################################################
    
    # print all the things
    #
    
    # absorber info lists
    blues = []
    reds = []
    blueAbs = []
    redAbs = []
    blueW = []
    redW = []
    blueB = []
    redB = []
    blueErr = []
    redErr = []
    blueV = []
    redV = []
    blueImpact = []
    redImpact = []
    
    # galaxy info lists
    blueInc = []
    redInc = []
    blueFancyInc = []
    redFancyInc = []
    blueAz = []
    redAz = []
    bluePA = []
    redPA = []
    blueVcorr = []
    redVcorr = []
    blueEnv = []
    redEnv = []
    blueVir = []
    redVir = []
    blueLike = []
    redLike = []
    

    # ambiguous stuff
    void = []
    ambig = []
    for v,w,e in zip(lyaVAmbList,lyaWAmbList,envAmbList):
        if e == 0:
            void.append(w)
        else:
            ambig.append(w)
    
    
    # for targets
    finalTargets = {}
    for a in AGNnameList:
        if finalTargets.has_key(a):
            i = finalTargets[a]
            i+=1
            finalTargets[a] = i
            
        else:
            finalTargets[a] = 1
            
    # for ambiguous targets
    ambTargets = {}
    for a in ambAGNnameList:
        if ambTargets.has_key(a):
            i = ambTargets[a]
            i+=1
            ambTargets[a] = i
            
        else:
            ambTargets[a] = 1
        
    
    # for absorbers
    for d,w,e,v,i,b in zip(difList,lyaWList,lyaErrList,lyaVList,impactList,bList):
        if d>=0:
            blues.append(float(d))
            blueW.append(float(w))
            blueErr.append(float(e))
            blueV.append(float(v))
            blueImpact.append(float(i))
            blueAbs.append(abs(d))
            blueB.append(float(b))
        else:
            reds.append(float(d))
            redW.append(float(w))
            redErr.append(float(e))
            redV.append(float(v))
            redImpact.append(float(i))
            redAbs.append(abs(d))
            redB.append(float(b))
            
            
##########################################################################################
    blueSpiralInc = []
    redSpiralInc = []
    spiralIncList = []
    # for spirals only
    for d,inc in zip(difList,fancyIncList):
        spiralIncList.append(float(inc))
        if d>=0:
            blueSpiralInc.append(float(inc))
        else:
            redSpiralInc.append(float(inc))
                
                
    # compile a list of only spiral galaxy inclinations from the full galaxy table
    if getpass.getuser() == 'frenchd':
#         galaxyFile = open('/Users/David/Research_Documents/gt/NewGalaxyTable5.csv','rU')
        galaxyFile = open('/Users/frenchd/Research/gt/FinalGalaxyTable12_filtered.csv','rU')
    else:
        print 'Not on laptop, exiting'
        sys.exit()
        
    reader = csv.DictReader(galaxyFile)
    
    allDiameters = []
    incGT25diam = []
    allSpiralIncList = []
    
    q0 = 0.2
    for i in reader:
#         major,minor = eval(i['linDiameters (kpc)'])
        major = eval(i['MajDiam'])
        adjustedInc = eval(i['adjustedInc'])

        morph = i['MType'].lower()
        if bfind(morph,'s'):
            if not bfind(morph,'sph') and not bfind(morph,'s0'):
                if major != -99.99:
                    allSpiralIncList.append(adjustedInc)
                    if float(major) >=25.0:
                        incGT25diam.append(adjustedInc)
    
    galaxyFile.close()
    
##########################################################################################
                   
            
    nameDict = {}
    # for galaxies
    for d,inc,finc,az,pa,vcorr,e,vir,l,name in zip(difList,incList,fancyIncList,azList,paList,vcorrList,envList,virList, likeList,nameList):
        if nameDict.has_key(name):
            i = nameDict[name]
            i+=1
            nameDict[name] = i
        else:
            nameDict[name] = 1
        
        if d>=0:
            if inc !=-99:
                blueInc.append(float(inc))
            if finc !=-99:
                blueFancyInc.append(float(finc))
            if az !=-99:
                blueAz.append(float(az))
            if pa !=-99:
                bluePA.append(float(pa))
            if vcorr !=-99:
                blueVcorr.append(float(vcorr))
            blueEnv.append(float(e))
            if vir !=-99:
                blueVir.append(float(vir))
            if l !=-99:
                blueLike.append(float(l))
        else:
            if inc !=-99:
                redInc.append(float(inc))
            if finc !=-99:
                redFancyInc.append(float(finc))
            if az !=-99:
                redAz.append(float(az))
            if pa !=-99:
                redPA.append(float(pa))
            if vcorr !=-99:
                redVcorr.append(float(vcorr))
            redEnv.append(float(e))
            if vir !=-99:
                redVir.append(float(vir))
            if l !=-99:
                redLike.append(float(l))
                
    galaxyNames = nameDict.keys()
                
    # how many absorbers above vs below vel_cut?
    redVelCount200 = 0
    redVelCount100 = 0
    blueVelCount200 = 0
    blueVelCount100 = 0
    
    for b in blues:
        if b >=200:
            blueVelCount200 +=1
        if b >= 100:
            blueVelCount100 +=1
        
    for r in reds:
        if abs(r) >=200:
            redVelCount200 +=1
        if abs(r) >=100:
            redVelCount100 +=1
    

    assocFancyInc = blueFancyInc + redFancyInc
    
    AGNnameDict = {}
    for i in AGNnameList:
        if AGNnameDict.has_key(i):
            c = AGNnameDict[i]
            c +=1
            AGNnameDict[i] = c
        else:
            AGNnameDict[i] = 1
        
    AGN_list = AGNnameDict.keys()

    
    print
    print '------------------------ Pilot Data ------------------------------'
    print
    print ' FOR THE FOLLOWING INCLUDE SET:'
    print ' Virial radius include = ',virInclude
    print ' Custom include =        ',cusInclude
    print ' Final include =         ',finalInclude
    print ' Match =                 ',match
    print
#     print 'total number of lines: ', len(lyaWList) + len(lyaWAmbList)
    print 'total number of lines: ', len(lyaV_all)
    print 'total number of unique galaxies matched: ',len(galaxyNames)
    print 'total number of AGN: ',len(AGN_list)
    print 'total number of associated lines: ',len(difList)
    print 'total number of ambiguous lines: ',len(ambig)
    print 'total number of void lines: ',len(void)
    print '# of redshifted lines: ',len(reds)
    print '# of blueshifted lines: ',len(blues)
    print
    print
    print ' ASSOCIATED TARGETS '
    print
    print 'final target number: ',len(finalTargets.keys())
    for i in finalTargets.keys():
        print i
    print
    print
    print ' AMBIGUOUS TARGTS '
    print
    print 'final ambiguous number: ',len(ambTargets.keys())
    for i in ambTargets.keys():
        print i
    print
    print
    print '----------------------- Absorber info ----------------------------'
    print
    print 'avg blueshifted EW: ',mean(blueW)
    print 'median blueshifted EW: ',median(blueW)
    print 'avg blue err: ',mean(blueErr)
    print 'median blue err: ',median(blueErr)
    print
    print 'std(blue EW): ',std(blueW)
    print 'stats.sem(blue EW): ',stats.sem(blueW)
    print 'stats.describe(blue EW): ',stats.describe(blueW)
    print
    print 'avg blueshifted vel_diff: ',mean(blues)
    print 'median blueshifted vel_diff: ',median(blues)
    print 'std(blueshifted vel_diff): ',std(blues)
    print 'stats.sem(blue vel_dif): ',stats.sem(blues)
    print 'stats.describe(blue vel_dif: ',stats.describe(blues)
    print
    print '% blueshifted which have vel_diff >= 200 km/s: {0}'.format(float(blueVelCount200)/len(blues))
    print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(blueVelCount200)
    print '% blueshifted which have vel_diff >= 100 km/s: {0}'.format(float(blueVelCount100)/len(blues))
    print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(blueVelCount100)
    print
    
    print 'avg blue velocity: ',mean(blueV)
    print 'median blue velocity: ',median(blueV)
    print 'std(blue Velocity): ',std(blueV)
    print 'avg blue impact: ',mean(blueImpact)
    print 'median blue impact: ',median(blueImpact)
    print 'stats.sem(blue impact): ',stats.sem(blueImpact)
    print 'stats.describe(blue impact): ',stats.describe(blueImpact)

    print
    
    print 'avg redshifted EW: ',mean(redW)
    print 'median redshifted EW: ',median(redW)
    print 'avg red err: ',mean(redErr)
    print 'median red err: ',median(redErr)
    print
    print 'std(red EW): ',std(redW)
    print 'stats.sem(red EW): ',stats.sem(redW)
    print 'stats.describe(red EW): ',stats.describe(redW)

    print
    print 'avg redshifted vel_diff: ',mean(reds)
    print 'median redshifted vel_diff: ',median(reds)
    print 'std(redshifted vel_dif): ',std(reds)
    print 'stats.sem(red vel_dif): ',stats.sem(reds)
    print 'stats.describe(red vel_dif): ',stats.describe(reds)
    print
    print '% redshifted which have abs(vel_diff) >= 200 km/s: {0}'.format(float(redVelCount200)/len(reds))
    print 'total number with abs(vel_diff) >= 200 km/s: {0}'.format(redVelCount200)
    print '% redshifted which have abs(vel_diff) >= 100 km/s: {0}'.format(float(redVelCount100)/len(reds))
    print 'total number with abs(vel_diff) >= 100 km/s: {0}'.format(redVelCount100)
    print

    print 'avg red velocity: ',mean(redV)
    print 'median red velocity: ',median(redV)
    print
    print 'avg red impact: ',mean(redImpact)
    print 'median red impact: ',median(redImpact)
    print 'stats.sem(red impact): ',stats.sem(redImpact)
    print 'stats.describe(red impact): ',stats.describe(redImpact)
    print 'std(red impact): ',std(redImpact)



    print
    print '----------------------- Galaxy info ----------------------------'
    print
    
    # regular inclinations
    incCut = 50
    totalBlueInc = len(blueInc)
    totalRedInc = len(redInc)
    
    blueIncCount = 0
    for i in blueInc:
        if i >= incCut:
            blueIncCount +=1
            
    redIncCount = 0
    for i in redInc:
        if i >= incCut:
            redIncCount +=1
            
    totalInc = len(allInclinations)
    totalCount = 0
    for i in allInclinations:
        if i >= incCut:
            totalCount +=1
            
            
    # fancy inclinations
    totalBlueFancyInc = len(blueFancyInc)
    totalRedFancyInc = len(redFancyInc)
    
    blueFancyIncCount = 0
    for i in blueFancyInc:
        if i >= incCut:
            blueFancyIncCount +=1
            
    redFancyIncCount = 0
    for i in redFancyInc:
        if i >= incCut:
            redFancyIncCount +=1
            
    combinedCount = redFancyIncCount + blueFancyIncCount
    totalCombinedCount = totalRedFancyInc + totalBlueFancyInc
            
    totalFancyInc = len(allFancyInclinations)
    totalFancyCount = 0
    for i in allFancyInclinations:
        if i >= incCut:
            totalFancyCount +=1
    
    print
    print ' INCLINATIONS: '
    print 
    print 'Blue: {0} % of associated galaxies have >={1}% inclination'.format(float(blueIncCount)/float(totalBlueInc),incCut)
    print 'Red: {0} % of associated galaxies have >={1}% inclination'.format(float(redIncCount)/float(totalRedInc),incCut)
    print 'All: {0} % of ALL galaxies have >={1}% inclination'.format(float(totalCount)/float(totalInc),incCut)
    print
    print ' FANCY INCLINATIONS: '
    print
    print 'Blue: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(blueFancyIncCount)/float(totalBlueFancyInc),incCut)
    print 'Red: {0} % of associated galaxies have >={1}% fancy inclination'.format(float(redFancyIncCount)/float(totalRedFancyInc),incCut)
    print 'All: {0} % of ALL galaxies have >={1}% fancy inclination'.format(float(totalFancyCount)/float(totalFancyInc),incCut)
    print 'Combined: {0} % of associated galaxies have >= {1} fancy inclination'.format(float(combinedCount)/float(totalCombinedCount),incCut)
    print
    print 'Average all fancy inclination: ',mean(allFancyInclinations)
    print 'stats.sem(all): ',stats.sem(allFancyInclinations)
    print    
    print 'avg blue inclination: ',mean(blueInc)
    print 'median blue inclination: ',median(blueInc)
    print 'avg blue fancy inclination: ',mean(blueFancyInc)
    print 'median blue fancy inclination: ',median(blueFancyInc)
    print
    print 'avg red inclination: ',mean(redInc)
    print 'median red inclination: ',median(redInc)
    print 'avg red fancy inclination: ',mean(redFancyInc)
    print 'median red fancy inclination: ',median(redFancyInc)
    
    print
    print 'mean associated: ',mean(assocFancyInc)
    print 'stats.sem(associated): ',stats.sem(assocFancyInc)
    print 'stats.describe(associated): ',stats.describe(assocFancyInc)
    print 'stats.sem(blue): ',stats.sem(blueFancyInc)
    print 'stats.describe(blue): ',stats.describe(blueFancyInc)
    print
    print 'stats.sem(red): ',stats.sem(redFancyInc)
    print 'stats.describe(red): ',stats.describe(redFancyInc)
    
    print
    print "  AZIMUTHS and PA:  "
    print
    print 'avg blue azimuth: ',mean(blueAz)
    print 'median blue azimuth: ',median(blueAz)
    print 'stats.sem(blue az): ',stats.sem(blueAz)
    print 'stats.describe(blue az): ',stats.describe(blueAz)
    print
    print 'avg red azimuth: ',mean(redAz)
    print 'median red azimuth: ',median(redAz)
    print 'stats.sem(red az): ',stats.sem(redAz)
    print 'stats.describe(red az): ',stats.describe(redAz)
    print
    print 'avg blue PA: ',mean(bluePA)
    print 'median blue PA: ',median(bluePA)
    print
    print 'avg red PA: ',mean(redPA)
    print 'median red PA: ',median(redPA)
    
    print
    print ' VCORR : '
    print
    print 'avg blue vcorr: ',mean(blueVcorr)
    print 'median blue vcorr: ',median(blueVcorr)
    print
    print 'avg red vcorr: ',mean(redVcorr)
    print 'median red vcorr: ',median(redVcorr)
    
    print
    print ' ENVIRONMENT: '
    print
    print 'avg blue environment: ',mean(blueEnv)
    print 'median blue environment: ',median(blueEnv)
    print
    print 'avg red environment: ',mean(redEnv)
    print 'median red environment: ',median(redEnv)
    
    print
    print ' R_vir: '
    print
    print 'avg blue R_vir: ',mean(blueVir)
    print 'median blue R_vir: ',median(blueVir)
    print 'stats.sem(blue R_vir): ',stats.sem(blueVir)
    print 'stats.describe(blue R_vir): ',stats.describe(blueVir)
    print
    print 'avg red R_vir: ',mean(redVir)
    print 'median red R_vir: ',median(redVir)
    print 'stats.sem(red R_vir): ',stats.sem(redVir)
    print 'stats.describe(red R_vir): ',stats.describe(redVir)

    print
    print ' LIKELIHOOD: '
    print
    print 'avg blue likelihood: ',mean(blueLike)
    print 'median blue likelihood: ',median(blueLike)
    print
    print 'avg red likelihood: ',mean(redLike)
    print 'median red likelihood: ',median(redLike)
    
    print
    print
    print '-------------------- Distribution analysis ----------------------'
    print
    print
    
    print ' FANCY INCLINATIONS: '
    
    # perform the K-S and AD tests for inclination
    ans1 = stats.ks_2samp(blueFancyInc, redFancyInc)
    ans1a = stats.anderson_ksamp([blueFancyInc,redFancyInc])

    print 'KS for blue vs red fancy inclinations: ',ans1
    print 'AD for blue vs red fancy inclinations: ',ans1a
    
    ans2 = stats.ks_2samp(blueFancyInc, allFancyInclinations)
    print 'KS for blue vs all fancy inclinations: ',ans2
    
    ans3 = stats.ks_2samp(redFancyInc, allFancyInclinations)
    print 'KS for red vs all fancy inclinations: ',ans3
    
    print
    z_statrb, p_valrb = stats.ranksums(blueFancyInc, redFancyInc)
    z_statall, p_valall = stats.ranksums(assocFancyInc, allFancyInclinations)
    print 'ranksum red vs blue p-value: ',p_valrb
    print 'ranksum associated vs all: ',p_valall


    ans4 = stats.ks_2samp(assocFancyInc, allFancyInclinations)
    ans4a = stats.anderson_ksamp([assocFancyInc,allFancyInclinations])

    print 'KS for all associated vs all fancy inclinations: ',ans4
    print 'AD for all associated vs all fancy inclinations: ',ans4a
    
    print

#     ans5 = stats.ks_2samp(spiralIncList, allSpiralIncList)
#     ans5a = stats.anderson_ksamp([spiralIncList,allSpiralIncList])
# 
#     print 'KS for all spiral associated vs all spiral fancy inclinations: ',ans5
#     print 'AD for all spiral associated vs all spiral fancy inclinations: ',ans5a
    
    print
    print ' INCLINATIONS: '
    print
    
    # perform the K-S and AD tests for inclination
    ans1 = stats.ks_2samp(blueInc, redInc)
    ans1a = stats.anderson_ksamp([blueInc,redInc])

    print 'KS for blue vs red inclinations: ',ans1
    print 'AD for blue vs red inclinations: ',ans1a
    
    ans2 = stats.ks_2samp(blueInc, allInclinations)
    print 'KS for blue vs all inclinations: ',ans2
    
    ans3 = stats.ks_2samp(redInc, allInclinations)
    print 'KS for red vs all inclinations: ',ans3
    
    assocInc = blueInc + redInc
    ans4 = stats.ks_2samp(assocInc, allInclinations)
    print 'KS for associated vs all inclinations: ',ans4
    
    print
    print ' EW Distributions: '
    print
    
    # perform the K-S and AD tests for EW
    ans1 = stats.ks_2samp(blueW, redW)
    ans1a = stats.anderson_ksamp([blueW,redW])
    print 'KS for blue vs red EW: ',ans1
    print 'AD for blue vs red EW: ',ans1a
    

    print
    print ' Impact parameter Distributions: '
    print
    
    # perform the K-S and AD tests for impact parameter
    ans1 = stats.ks_2samp(blueImpact, redImpact)
    ans1a = stats.anderson_ksamp([blueImpact,redImpact])
    print 'KS for blue vs red impact parameters: ',ans1
    print 'AD for blue vs red impact parameters: ',ans1a
    
    print
    print ' \Delta v Distributions: '
    print
    
    # perform the K-S and AD tests for \delta v
    ans1 = stats.ks_2samp(blueAbs, redAbs)
    ans1a = stats.anderson_ksamp([blueAbs,redAbs])
    print 'KS for blue vs red \Delta v: ',ans1
    print 'AD for blue vs red \Delta v: ',ans1a
    
    print
    print ' Azimuth Distributions: '
    print
    
    # perform the K-S and AD tests for azimuth
    ans1 = stats.ks_2samp(blueAz, redAz)
    ans1a = stats.anderson_ksamp([blueAz,redAz])
    print 'KS for blue vs red azimuth: ',ans1
    print 'AD for blue vs red azimuth: ',ans1a
    print
    
    # now against a flat distribution
    flatRed = arange(0,90,1)
    flatBlue = arange(0,90,1)

    ans1 = stats.ks_2samp(blueAz, flatBlue)
    ans1a = stats.anderson_ksamp([blueAz,flatBlue])
    print 'KS for blue vs flat azimuth: ',ans1
    print 'AD for blue vs flat azimuth: ',ans1a
    print
    ans1 = stats.ks_2samp(redAz, flatRed)
    ans1a = stats.anderson_ksamp([redAz,flatRed])
    print 'KS for red vs flat azimuth: ',ans1
    print 'AD for erd vs flat azimuth: ',ans1a
    print
    
            
    print
    print ' Environment Distributions: '
    print
    
    # perform the K-S and AD tests for environment
    ans1 = stats.ks_2samp(blueEnv, redEnv)
    ans1a = stats.anderson_ksamp([blueEnv,redEnv])
    print 'KS for blue vs red environment: ',ans1
    print 'AD for blue vs red environment: ',ans1a
    
    print
    print ' R_vir Distributions: '
    print
    
    # perform the K-S and AD tests for r_vir
    ans1 = stats.ks_2samp(blueVir, redVir)
    ans1a = stats.anderson_ksamp([blueVir,redVir])
    print 'KS for blue vs red R_vir: ',ans1
    print 'AD for blue vs red R_vir: ',ans1a
    
    print
    print ' Doppler parameter Distributions: '
    print
    
    # perform the K-S and AD tests for doppler parameter
    ans1 = stats.ks_2samp(blueB, redB)
    ans1a = stats.anderson_ksamp([blueB,redB])
    print 'KS for blue vs red doppler parameter: ',ans1
    print 'AD for blue vs red doppler parameter: ',ans1a
    
    print
    print ' Likelihood Distributions: '
    print
    
    # perform the K-S and AD tests for doppler parameter
    ans1 = stats.ks_2samp(blueLike, redLike)
    ans1a = stats.anderson_ksamp([blueLike,redLike])
    print 'KS for blue vs red likelihood: ',ans1
    print 'AD for blue vs red likelihood: ',ans1a
    
    print
    print ' COMPLETED. '