Exemple #1
0
def ttest_range(sample, left, right):
    k = len(sample) - 1
    left = stdtr(k, -t_statistic(sample, left))
    print 'left:', left
    right = stdtr(k, t_statistic(sample, right))
    print 'right:', right
    outside = left + right
    print 'outside:', outside
    inside = 1 - outside
    print 'inside:', inside
    return inside
Exemple #2
0
def spearman_rs(l1,l2):
    """Compute Spearman-Rank Correlation Coefficient with corresponding p-Value"""

    if len(l1) == 0 or len(l2) == 0:
	print 'ERROR: LISTS CONTAIN NO ELEMENTS!'
	return -1. 
    elif len(l1) != len(l2):
	print 'ERROR: LISTS HAVE TO HAVE THE SAME LENGTH!'
	return -1. 
    l1 = rankdata(l1)
    l2 = rankdata(l2)
    l1_mean = sum(l1)/len(l1)
    l2_mean = sum(l2)/len(l2)
    sum1 = 0.
    sum2 = 0.
    numerator = 0.
# Compute Spearman rs
    for i in range(0,len(l1)):
	numerator +=(l1[i] - l1_mean)*(l2[i] - l2_mean)
	sum1 += (l1[i] - l1_mean)**2
	sum2 += (l2[i] - l2_mean)**2
    denum = sqrt(sum1)*sqrt(sum2)
    rs = numerator/denum
# Compute Spearman t
    t = len(l1) - 2.
    t /= 1. - rs**2
    t = rs*sqrt(t)
# if t > 0: change sign, since student's t is axis symmetric around zero
    if t>0:
	t_help = (-1.)*t
    else:
	t_help = t
#p = stdtr(len(z)-2.,t_help)
    p = stdtr(len(l1)-2.,t_help)
    return (rs,p)
	def welch_test( x1_stats, x2_stats):
		x1bar, x2bar, v1, v2, n1, n2 = x1_stats[0], x2_stats[0], x1_stats[1]**2, x2_stats[1]**2, x1_stats[2], x2_stats[2]
		# Compute Welch's t-test using the descriptive statistics.
		tf = (x1bar - x2bar) / np.sqrt(v1/n1 + v2/n2)
		dof = (v1/n1 + v2/n2)**2 / (v1**2/(n1**2*(n1-1)) + v2**2/(n2**2*(n2-1)))
		pf = 2*stdtr(dof, -np.abs(tf))
		return float(pf)
Exemple #4
0
def one_sample_t(A,mu):
    n = len(A)
    df = n - 1
    z = np.mean(A) - mu 
    z /= unbiased_std(A)
    t = z * np.sqrt(n)
    return t, stdtr(df,t)
Exemple #5
0
def two_sample_t(A,B,expected_diff=0):
    diff = (np.mean(A) - np.mean(B) - expected_diff)
    na = len(A)
    nb = len(B)
    df = na + nb - 2
    sum_sq = (var(A)*(na-1) + var(B)*(nb-1))
    f = (1/na + 1/nb)/df
    t = diff/np.sqrt(sum_sq*f)  
    return (t, stdtr(df,t))
Exemple #6
0
def two_sample_t_test_welch(a,b):
    ''' Welch t-test'''
    from scipy.special import stdtr
    abar = a.mean()
    avar = a.var(ddof=1)
    na = a.size
    adof = na - 1
    bbar = b.mean()
    bvar = b.var(ddof=1)
    nb = b.size
    bdof = nb - 1
    # Compute Welch's t-test using the descriptive statistics.
    tf = (abar - bbar) / np.sqrt(avar/na + bvar/nb)
    dof = (avar/na + bvar/nb)**2 / (avar**2/(na**2*adof) + bvar**2/(nb**2*bdof))
    pf = stdtr(dof, -np.abs(tf))
    return (tf, pf)
Exemple #7
0
    def ttest(self):
        """
        ttest implementation that uses efficient variance computation
        """
        abar = self.a_estimator.mean()
        bbar = self.b_estimator.mean()

        na = self.a_estimator.num_samples()
        adof = na - 1
        nb = self.b_estimator.num_samples()
        bdof = nb - 1

        avar = self.a_estimator.var()
        bvar = self.b_estimator.var()

        tf = (abar - bbar) / np.sqrt(avar/na + bvar/nb)
        dof = (avar / na + bvar / nb) ** 2 / (avar ** 2 / (na ** 2 * adof) + bvar ** 2 / (nb ** 2 * bdof))
        pf = 2*stdtr(dof, -np.abs(tf))
        return pf
Exemple #8
0
def t_test_manual(l1,l2):
    l1 = np.asarray(l1)
    l2 = np.asarray(l2)

    l1bar = l1.mean()
    l2bar = l2.mean()

    l1var = l1.var(ddof=1) #why degree of freedom is 1 here?
    l2var = l2.var(ddof=1)

    n_l1 = l1.size
    n_l2 = l2.size

    df_l1 = n_l1 - 1
    df_l2 = n_l2 - 1

    #use des stat to calculate Welch's t-test
    tf = (l1bar - l2bar) / np.sqrt(l1var/n_l1+l2var/n_l2)
    dof = (l1var/n_l1 + l2var/n_l2)**2 / (l1var**2/(n_l1**2*df_l1) + l2var**2/(n_l2**2*df_l2))
    pf = 2*stdtr(dof,-np.abs(tf))

    return tf, pf
Exemple #9
0
	def character_stats(self):
		for char, cngrams in self.character_ngrams.items():
			for words, cn in cngrams.items():
				n = len(words)
				count_all = self.ngrams[words]['count']
				count_char = cn['count']
				
				# bernoulli!
				char_total = float(self.character_ngram_totals[char][n])
				cn['freq'] = char_p = count_char / char_total
				other_total = self.ngram_totals[n] - char_total
				cn['other_freq'] = other_p = (count_all - count_char) / other_total
				
				if count_all == count_char:
					p_value = 0.0		# only this character ever says it! (also would cause /0)
				else:
					if char_total == 1.0:	# special case to avoid divide by zero
						nu, t = size1special(char_p, other_p, other_p*(1.0-other_p), other_total)
					else:
						nu, t = welch(char_p, char_p*(1.0-char_p), char_total, other_p, other_p*(1.0-other_p), other_total)
					p_value = 1.0 - stdtr(nu, t)
				cn['p_value'] = p_value
Exemple #10
0
    def cdf(self, x):
        """
        Computes the cumulative distribution function of they
        distribution at the point(s) x. The cdf is defined as follows:
            F(x| nu) = 1 - 1 / 2 *  I_x(t) *(nu / 2, 1 / 2)

        where x(t) = nu / (t ** 2 + u) and I_x is the regularized incomplete
        beta function.

        Parameters
        ----------
        x: array, dtype=float, shape=(m x n)
            The value(s) at which the user would like the cdf evaluated.
            If an array is passed in, the cdf is evaluated at every point
            in the array and an array of the same size is returned.

        Returns
        -------
        cdf: array, dtype=float, shape=(m x n)
            The cdf at each point in x.
        """
        cdf = stdtr(self.nu, x)

        return cdf
Exemple #11
0
cholsigmainv = np.linalg.cholesky(np.linalg.inv(np.cov(screens.T)))
warped_screens = screens.values @ cholsigmainv
warped_intercept = cholsigmainv.sum(axis=0)

# Then just run linear regression; this implementation is based on 
# https://pingouin-stats.org/generated/pingouin.linear_regression.html

def linear_regression(warped_screens, warped_intercept):
    GLS_coef = np.empty((len(warped_screens), len(warped_screens)))
    GLS_se = np.empty((len(warped_screens), len(warped_screens)))
    ys = warped_screens.T
    for gene_index in range(len(warped_screens)):
        X = np.stack((warped_intercept, warped_screens[gene_index]), axis=1)
        coef, residues = np.linalg.lstsq(X, ys, rcond=None)[:2]
        df = warped_screens.shape[1] - 2
        GLS_coef[gene_index] = coef[1]
        GLS_se[gene_index] = \
            np.sqrt(np.linalg.pinv(X.T @ X)[1, 1] * residues / df)
    return GLS_coef, GLS_se

GLS_coef, GLS_se = linear_regression(warped_screens, warped_intercept)
df = warped_screens.shape[1] - 2
GLS_p = 2 * stdtr(df, -np.abs(GLS_coef / GLS_se))
np.fill_diagonal(GLS_p, 1)

# Save everything

np.save('GLS_p.npy', GLS_p)
np.save('GLS_sign.npy', np.sign(GLS_coef))
screens.index.to_series().to_csv('genes.txt', index=False, header=False)
Exemple #12
0
 def _cdf(self, x, df, C, Ci):
     out = special.stdtr(df, numpy.dot(Ci, special.stdtrit(df, x)))
     return out
Exemple #13
0
# Create sample data.
a = df0.outcome
b = df1.outcome

# Use scipy.stats.ttest_ind.
t, p = ttest_ind(a, b, equal_var=False)
print("ttest_ind: t = %g  p = %g" % (t, p))
results = ("ttest_ind: t = %g  p = %g" % (t, p))

# Compute the descriptive statistics of a and b.
abar = a.mean()
avar = a.var(ddof=1)
na = a.size
adof = na - 1

bbar = b.mean()
bvar = b.var(ddof=1)
nb = b.size
bdof = nb - 1

# Use scipy.stats.ttest_ind_from_stats.
t2, p2 = ttest_ind_from_stats(abar, np.sqrt(avar), na,
                              bbar, np.sqrt(bvar), nb,
                              equal_var=False)
print("ttest_ind_from_stats: t = %g  p = %g" % (t2, p2))

# Use the formulas directly.
tf = (abar - bbar) / np.sqrt(avar/na + bvar/nb)
dof = (avar/na + bvar/nb)**2 / (avar**2/(na**2*adof) + bvar**2/(nb**2*bdof))
pf = 2*stdtr(dof, -np.abs(tf))
Exemple #14
0
def one_sample_t(A,mu):
    n = len(A)
    df = n-1
    z = (np.mean(A) - mu) / std(A)
    t = z * np.sqrt(n)
    return t, stdtr(df,t)
Exemple #15
0
 def _cdf(self, x, a, C, Ci, loc):
     x = numpy.dot(Ci, (x.T-loc.T).T)
     return special.stdtr(a, x)
Exemple #16
0
def _cdft(x,df):
    return special.stdtr(df, x)
Exemple #17
0
 def _ppf(self, q, df, C, Ci):
     out = special.stdtr(df, numpy.dot(C, special.stdtrit(df, q)))
     return out
Exemple #18
0
 def _cdf(self, x, df, C, Ci):
     out = special.stdtr(df, numpy.dot(Ci, special.stdtrit(df, x)))
     return out
Exemple #19
0
def _cdft(x, df):
    return special.stdtr(df, x)  # pylint: disable=no-member
 def _pdf(self, x, df, alpha):
     # 2*normpdf(x)*normcdf(alpha*x)
     return 2.0 * distributions.t._pdf(x, df) * special.stdtr(df + 1, alpha * x * np.sqrt((1 + df) / (x ** 2 + df)))
Exemple #21
0
 def _cdf(self, x, a):
     return special.stdtr(a, x)
Exemple #22
0
    def conll_to_contexts(self, conll_file, ctxt_out_file, \
                          ctxt_type="syntactic", ctxt_dir="up,down", \
                          pterm_min_freq=1000, ctxt_min_freq=1000, \
                          pterm_pos="ADJ,ADV,NC,V,VIMP,VINF,VPP,VPR,VS", \
                          pterm_cpos="ADJ,ADV,NC,V,VIMP,VINF,VPP,VPR,VS", \
                          pterm_use_lem="ADJ,ADV,NC,V,VIMP,VINF,VPP,VPR,VS", \
                          sterm_pos="ADJ,ADV,NC,V,VIMP,VINF,VPP,VPR,VS", \
                          sterm_cpos="ADJ,ADV,NC,V,VIMP,VINF,VPP,VPR,VS", \
                          sterm_use_lem="ADJ,ADV,NC,V,VIMP,VINF,VPP,VPR,VS", \
                          skip_pos="P,P+D,CC,CS", skip_cpos="", \
                          skip_use_lem="P,P+D,CC,CS", skip_only=False, \
                          use_deplabel=True, weight_fun="pmi"):

        # Read in data.
        print >> sys.stderr, "Extracting context relations from CONLL..."
        t0 = time.time()

        pterm_min_freq = int(pterm_min_freq)
        ctxt_min_freq = int(ctxt_min_freq)
        ctxt_type_set = set(ctxt_type.split(","))
        ctxt_dir_set = set(ctxt_dir.split(","))
        pterm_pos_set = set(pterm_pos.split(","))
        pterm_cpos_set = set(pterm_cpos.split(","))
        pterm_use_lem_set = set(pterm_use_lem.split(","))
        sterm_pos_set = set(sterm_pos.split(","))
        sterm_cpos_set = set(sterm_cpos.split(","))
        sterm_use_lem_set = set(sterm_use_lem.split(","))
        skip_pos_set = set(skip_pos.split(","))
        skip_cpos_set = set(skip_cpos.split(","))
        skip_use_lem_set = set(skip_use_lem.split(","))

        sent = [()] # (LEMMA, CPS, FPS, HEAD, LABEL)
        pterm_cnt = {} # PTERM -> COUNT
        prel_cnt = {} # (PTERM, REL) -> COUNT
        crel_cnt = {} # PTERM -> (REL, STERM) -> COUNT
        ctxt_cnt = {} # (REL, STERM) -> COUNT
        rel_cnt = {} # REL -> COUNT
        sterm_cnt = {} # STERM -> COUNT
        tot_cnt = 0

        conll_f = codecs.open(conll_file, 'r', ENCODING)

        for _,sent in read_conll(conll_f, mode="extract"):

            # Extract context relations from a full sent.
            for i in range(1, len(sent)):
                crels = []
                dep = sent[i]
                deppos = dep[CPS] if dep[FPS] in pterm_cpos_set \
                         else dep[FPS]

                # Linear dependency context relations.
                if "linear" in ctxt_type_set:
                    prv = sent[i-1] if i > 1 else None
                    nxt = sent[i+1] if i < len(sent)-1 else None

                    # Store previous token relation.
                    if prv and \
                           "prev" in ctxt_dir_set and \
                           dep[FPS] in pterm_pos_set and \
                           prv[FPS] in sterm_pos_set:
                        prvpos = prv[CPS] if prv[FPS] in sterm_cpos_set \
                                 else prv[FPS]
                        rel = tuple(["*p*"])
                        pterm = deppos,"<"+deppos+">"
                        if dep[FPS] in pterm_use_lem_set:
                            pterm = deppos,dep[LEM]
                        # Store up to two relations, depending on sterm lex
                        sterm = prvpos,"<"+prvpos+">"
                        crels.append((pterm,sterm,rel))
                        if prv[FPS] in sterm_use_lem_set:
                            sterm = prvpos,prv[LEM]
                            crels.append((pterm,sterm,rel))

                    # Store next token relation.
                    if nxt and \
                           "next" in ctxt_dir_set and \
                           dep[FPS] in pterm_pos_set and \
                           nxt[FPS] in sterm_pos_set:
                        nxtpos = nxt[CPS] if nxt[FPS] in sterm_cpos_set \
                                 else nxt[FPS]
                        rel = tuple(["*n*"])
                        pterm = deppos,"<"+deppos+">"
                        if dep[FPS] in pterm_use_lem_set:
                            pterm = deppos,dep[LEM]
                        # Store up to two relations, depending on sterm lex
                        sterm = nxtpos,"<"+nxtpos+">"
                        crels.append((pterm,sterm,rel))
                        if nxt[FPS] in sterm_use_lem_set:
                            sterm = nxtpos,nxt[LEM]
                            crels.append((pterm,sterm,rel))

                # Syntactic dependency context relations.
                if "syntactic" in ctxt_type_set:
                    gov = sent[dep[GOV]]
                    path = []
                    if use_deplabel:
                        path.append(dep[LAB])
                        
                    # Skip at most one time to next governor up.
                    skipped = False
                    if len(gov) > 0 and gov[FPS] in skip_pos_set:
                        govpos = gov[CPS] if gov[FPS] in skip_cpos else \
                                 gov[FPS]
                        if gov[FPS] in skip_use_lem_set:
                            path.append(govpos+"|"+gov[LEM])
                        else:
                            path.append(govpos)
                        if use_deplabel:
                            path.append(gov[LAB])
                        gov = sent[gov[GOV]]
                        if len(gov) > 0:
                            if gov[FPS] in skip_pos_set: # Can't skip twice
                                gov = []
                            else:
                                skipped = True
                            
                    if len(gov) > 0 and (skipped or not skip_only):
                        # Store upward relation.
                        if "up" in ctxt_dir_set and \
                               dep[FPS] in pterm_pos_set and \
                               gov[FPS] in sterm_pos_set:
                            govpos = gov[CPS] if gov[FPS] in sterm_cpos_set \
                                     else gov[FPS]
                            rel = tuple(["*u*"] + path)
                            pterm = deppos,"<"+deppos+">"
                            if dep[FPS] in pterm_use_lem_set:
                                pterm = deppos,dep[LEM]
                            # Store up to two relations, depending on sterm lex
                            sterm = govpos,"<"+govpos+">"
                            crels.append((pterm,sterm,rel))
                            if gov[FPS] in sterm_use_lem_set:
                                sterm = govpos,gov[LEM]
                                crels.append((pterm,sterm,rel))

                        # Store downward relation.
                        if "down" in ctxt_dir_set and \
                               gov[FPS] in pterm_pos_set and \
                               dep[FPS] in sterm_pos_set:
                            govpos = gov[CPS] if gov[FPS] in pterm_cpos_set \
                                     else gov[FPS]
                            path.reverse()
                            rel = tuple(["*d*"] + path)
                            pterm = govpos,"<"+govpos+">"
                            if gov[FPS] in pterm_use_lem_set:
                                pterm = govpos,gov[LEM]
                            # Store up to two relations, depending on sterm lex
                            sterm = deppos,"<"+deppos+">"
                            crels.append((pterm,sterm,rel))
                            if dep[FPS] in sterm_use_lem_set:
                                sterm = deppos,dep[LEM]
                                crels.append((pterm,sterm,rel))

                # Store relevant pterm, context, context relation counts.
                for pterm,sterm,rel in crels:
                    ctxt = rel, sterm
                    prel = pterm, rel
                    crel = pterm, rel, sterm
                    pterm_cnt[pterm] = pterm_cnt.get(pterm, 0) + 1
                    ctxt_cnt[ctxt] = ctxt_cnt.get(ctxt, 0) + 1
                    if pterm not in crel_cnt:
                        crel_cnt[pterm] = {}
                    crel_cnt[pterm][ctxt] = crel_cnt[pterm].get(ctxt, 0) + 1
                    tot_cnt += 1

            sent = [()]
        conll_f.close()

        # print >> sys.stderr, "# crel occurrences:", tot_cnt

        # Retain and weight frequent pterm and ctxt only 
        fctxt = open(ctxt_out_file, "wb")

        # Store vocabularies
        id_to_pterm = []
        unk_pos = {}
        cnt = 0
        for pterm in sorted(pterm_cnt.keys()):
            pterm_pos, pterm_lem = pterm
            if pterm_pos.startswith("V") and pterm_lem in STOP:
                del pterm_cnt[pterm]
                del crel_cnt[pterm]
                continue
            if pterm_cnt[pterm] >= pterm_min_freq:
                id_to_pterm.append(pterm)
                cnt += 1
            else:
                unk = (pterm_pos, "<UNK>")
                if pterm_pos not in unk_pos:
                    unk_pos[pterm_pos] = True
                    pterm_cnt[unk] = 0
                    crel_cnt[unk] = {}
                    id_to_pterm.append(unk)
                    cnt += 1
                pterm_cnt[unk] += pterm_cnt[pterm]
                for ctxt in crel_cnt[pterm].keys():
                    crel_cnt[unk][ctxt] = crel_cnt[unk].get(ctxt, 0) + \
                                          crel_cnt[pterm][ctxt]
                del pterm_cnt[pterm]
                del crel_cnt[pterm]
                
        id_to_pterm = tuple(id_to_pterm)
        print >> sys.stderr, "# pterm found:", cnt
        id_to_ctxt = []
        cnt = 0
        for ctxt in sorted(ctxt_cnt.keys()):
            if ctxt_cnt[ctxt] >= ctxt_min_freq:
                id_to_ctxt.append(ctxt)
                cnt += 1
            else:
                del ctxt_cnt[ctxt]
        id_to_ctxt = tuple(id_to_ctxt)
        print >> sys.stderr, "# ctxt found:", cnt
        cPickle.dump(id_to_pterm, fctxt, -1)
        cPickle.dump(id_to_ctxt, fctxt, -1)

        # Reusable extremum weights, given the total count n
        n = tot_cnt
        n2 = n**2

        # PMI min: c1=c2=n/2, c12=1
        pmi_min = log(4.0/n, 2)

        # PMI max: c1=pterm_min_freq, c2=ctxt_min_freq, c12=min(c1,c2)
        maxmin_cut = float(max(pterm_min_freq, ctxt_min_freq))
        pmi_max = log(n/maxmin_cut, 2)

        # LRATIO min: useful only if p1 < p2, so min is 0
        lratio_min = 0.0

        # LRATIO max: c1=c2=c12=n/x, where x is optimal divisor
        x = 3.9215536345675
        x2 = x**2
        lratio_max  = -2*((n/x)*log(1/x2) + (n-n/x)*log(1-1/x2) - \
                          (n/x)*log(1/x) - (n-n/x)*log(1-1/x))

        for ptermid in xrange(len(id_to_pterm)):
            curvector = [None]*len(id_to_ctxt)
            pterm = id_to_pterm[ptermid]

            for ctxtid in xrange(len(id_to_ctxt)):
                ctxt = id_to_ctxt[ctxtid]

                # Quick values for math
                c12 = crel_cnt[pterm].get(ctxt, 0)
                c1 = pterm_cnt[pterm]
                c2 = ctxt_cnt[ctxt]
                p1 = float(c1 * c2) / n2 # Null hypothesis
                p2 = float(c12) / n # Alternative hypothesis
                cont = [[c12,c2-c12],[c1-c12,n+c12-c1-c2]]
                
                if weight_fun == "relfreq": # [0,1] proportion
                    wgt = float(c12) / c1

                elif weight_fun == "chisq": # [-1,1] p-value
                    _,pval,_,_ = stats.chi2_contingency(cont)
                    wgt = 1 - pval if p1 < p2 else pval - 1

                elif weight_fun == "ttest": # [-1,1] p-value
                    wgt = -1
                    if c12 > 0:
                        tval = (c12 - float(c1*c2)/n) /\
                               sqrt(c12 * (1 - float(c12/n)))
                        if tval < 0:
                            wgt = -1 + 2*special.stdtr(n, tval)
                        else:
                            wgt = 1 - 2*special.stdtr(n, -tval)

                elif weight_fun == "binom": # [-1,1] Exact one-sided test
                    wgt = 0.0
                    if p1 < p2: # implied that c12 > 0
                        wgt = stats.binom.cdf(c12-1,n,p1)
                    else:
                        wgt = stats.binom.cdf(c12,n,p1) - 1

                elif weight_fun == "pmi": # [-1,1] information
                    # Transform from [pmi_min,pmi_max]
                    wgt = pmi_min
                    #wgt = -1
                    if c12 > 0:
                        wgt = log(float(c12 * n) / (c1 * c2), 2)
                        #if pmi < 0:
                        #    wgt = -pmi / pmi_min
                        #else:
                        #    wgt = pmi / pmi_max

                elif weight_fun == "lratio": # [0, 1]
                    # Transform from [0, lratio_max]
                    wgt = lratio_min
                    if p1 < p2:
                        wgt = -2*(c12*log(p1) + (n-c12)*log(1-p1) - \
                                  c12*log(p2) - (n-c12)*log(1-p2))
                        wgt /= lratio_max

                curvector[ctxtid] = wgt
            cPickle.dump(curvector, fctxt, -1)
        fctxt.close()
        print >> sys.stderr, "Done in %s sec." %(time.time()-t0)
Exemple #23
0
 def _pdf(self, x, df, alpha):
     # 2*normpdf(x)*normcdf(alpha*x)
     return 2.0*distributions.t._pdf(x, df) * special.stdtr(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))
Exemple #24
0
 def _ppf(self, q, df, C, Ci):
     out = special.stdtr(df, numpy.dot(C, special.stdtrit(df, q)))
     return out
Exemple #25
0
def one_sample_t(A, mu):
    n = len(A)
    df = n - 1
    z = (np.mean(A) - mu) / std(A)
    t = z * np.sqrt(n)
    return t, stdtr(df, t)
Exemple #26
0
 def _cdf(self, x, a, C, Ci, loc):
     x = np.dot(Ci, (x.T-loc.T).T)
     return special.stdtr(a, x)
Exemple #27
0
def _cdft(x, df):
    return special.stdtr(df, x)
    for i in range(m + 1, len(accuracy_df.index)):
        mean1 = accuracy_df["2. Average Accuracy"][m]
        mean2 = accuracy_df["2. Average Accuracy"][i]
        N1 = n
        N2 = n
        sample_std1 = accuracy_df["3. Accuracy Standard Deviation"][m]
        sample_variance1 = (sample_std1) ** 2
        sample_std2 = accuracy_df["3. Accuracy Standard Deviation"][i]
        sample_variance2 = (sample_std2) ** 2
        if mean1 > mean2:
            T_numerator = mean1 - mean2

        else:
            T_numerator = mean2 - mean1

        T_denominator = math.sqrt(sample_variance1 / N1 + sample_variance2 / N2)

        T = T_numerator / T_denominator

        deg_fre_numerator = (sample_variance1 / N1 + sample_variance2 / N2) ** 2
        deg_fre_denominator = (((sample_variance1) / N1) ** 2) / (N1 - 1) + (((sample_variance2) / N2) ** 2) / (N2 - 1)

        deg_fre = deg_fre_numerator / deg_fre_denominator
        pf = 2 * stdtr(deg_fre, -np.abs(T))
        print(t_test_df[t_test_df.columns.values[0]])
        t_test_df[t_test_df.columns.values[m + 1]][i] = pf

writer = pd.ExcelWriter("accuracy_t_test.xlsx", engine="xlsxwriter")
t_test_df.to_excel(writer)
writer.save()
Exemple #29
0
 def _cdf(self, x, a):
     return special.stdtr(a, x)