def TwoSamples_tTest(x,y, SignificanceLevel=0.05):

    # Analyze data
    n = len(x)
    m = len(y)
    s_x = np.std(x,ddof=1)
    s_y = np.std(y,ddof=1)
    x_bar = np.mean(x)
    y_bar = np.mean(y)

    # Perform test statistic
    DOFs = n+m-2
    S_pool = np.sqrt(1/DOFs * ( (n-1)*s_x**2 + (m-1)*s_y**2 ))
    T = (x_bar - y_bar) / (S_pool * np.sqrt(1/n + 1/m))

    # Compute p value
    from scipy.stats.distributions import t
    if T >= 0:
        p = 2 * (1-t.cdf(T,DOFs))
    else:
        p = 2 * t.cdf(T, DOFs)

    # Compute confidence interval CI
    T_Interval = np.array(t.interval(1-SignificanceLevel,DOFs))
    RejectionRange = np.array([[-np.inf,T_Interval[0]],[T_Interval[1],np.inf]])

    # Compute CI for difference in means
    MeansInterval =  (x_bar-y_bar) + T_Interval * S_pool * np.sqrt(1/n + 1/m)

    return T, p, RejectionRange, MeansInterval
Exemple #2
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		if n1 >= 2 and n2 >= 2:
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				
			propGroup2 = []
			for i in xrange(0, n2):
				propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
			
			# calculate p-value, effect size, and CI
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = variance(propGroup1, meanG1)
			varG2 = variance(propGroup2, meanG2)
			
			normVarG1 = varG1 / n1
			normVarG2 = varG2 / n2
			unpooledVar = normVarG1 + normVarG2
			sqrtUnpooledVar = math.sqrt(unpooledVar)
			
			
			if unpooledVar != 0:
				# p-value
				T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar
				dof = (unpooledVar*unpooledVar) / ( (normVarG1*normVarG1)/(n1-1) + (normVarG2*normVarG2)/(n2-1) )
				pValue = t.cdf(T_statistic, dof)
				
				# CI
				tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
				lowerCI = dp - tCritical*sqrtUnpooledVar
				upperCI = dp + tCritical*sqrtUnpooledVar
			else:
				if meanG1 != meanG2:
					pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
				else:
					pValue = 0.5
					
				lowerCI = dp
				upperCI = dp
				
				note = 'degenerate case: variance of both groups is zero'
		else:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
			note = 'degenerate case: both groups must contain at least 2 samples'
	
		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
Exemple #3
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		try:
			if n1 < 2 or n2 < 2:
				raise Exception('degenerate case: both groups must contain at least 2 samples')
				
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				
			propGroup2 = []
			for i in xrange(0, n2):
				propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
			
			# calculate statistics
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = variance(propGroup1, meanG1)
			varG2 = variance(propGroup2, meanG2)
			
			dof = n1 + n2 - 2
			pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2)
			sqrtPooledVar = math.sqrt(pooledVar)
			denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2)
				
			# p-value
			T_statistic = (meanG1 - meanG2) / denom
			pValue = t.cdf(T_statistic, dof)
			
			# CI
			tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
			lowerCI = dp - tCritical*denom
			upperCI = dp + tCritical*denom

		except Exception as note:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
		except ZeroDivisionError:
			if meanG1 != meanG2:
				pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
			else:
				pValue = 0.5
				
			lowerCI = dp
			upperCI = dp
			note = 'degenerate case: variance of both groups is zero'

		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
Exemple #4
0
    def build_aggregate_model(self):        
        aggModel = SingleModelData()
        aggModel.nfolds = len(self.modelData)
        aggModel.aggregate = True
        aggModel.unit = self.unit
        aggModel.preprocFile = self.modelData[0].preprocFile
        aggModel.stimClass = self.modelData[0].stimClass
        aggModel.freqs = self.modelData[0].freqs
        aggModel.numChans = self.modelData[0].numChans
        aggModel.timeLags = self.modelData[0].timeLags
        aggModel.high_freq = self.modelData[0].high_freq
        aggModel.low_freq = self.modelData[0].low_freq
        aggModel.is_surprise = self.modelData[0].is_surprise
        aggModel.is_count_response = self.modelData[0].is_count_response
        aggModel.output_nls = [md.output_nl for md in self.modelData]

        #compute aggregate STRF params
        strfs = np.array([smd.strf for smd in self.modelData])
        aggModel.strf = strfs.mean(axis=0).squeeze()
        aggModel.strfStd = strfs.std(axis=0).squeeze()
        
        smoothedStrfs = []
        g1 = gaussian_2d_kernel(1)
        for strf in strfs:
            sstrf = convolve2d(strf, g1, mode='same')
            smoothedStrfs.append(sstrf)
        smoothedStrfs = np.array(smoothedStrfs)
        
        aggModel.smoothedStrf = smoothedStrfs.mean(axis=0).squeeze()
        aggModel.smoothedStrfStd = smoothedStrfs.std(axis=0).squeeze()
                
        strf_tstat = np.abs(aggModel.smoothedStrf / aggModel.smoothedStrfStd)
        df = len(self.modelData) - 1
        strf_pvals = (1 - tdist.cdf(strf_tstat, df))*2
        aggModel.smoothedStrfPvals = strf_pvals

        #compute aggregate output nl
        minx_vals = []
        maxx_vals = []
        for nl in aggModel.output_nls:
            minx_vals.append(np.min(nl.domain))
            maxx_vals.append(np.max(nl.domain))
        minx_vals = np.array(minx_vals)
        maxx_vals = np.array(maxx_vals)

        minx = minx_vals.max()
        maxx = maxx_vals.min()
        avg_x = np.linspace(minx, maxx, 200)
        avg_x = avg_x[1:-2]
        y = np.zeros([len(aggModel.output_nls), len(avg_x)])
        
        if minx < maxx:
            for k,nl in enumerate(aggModel.output_nls):            
                xnl = nl.domain.squeeze()
                ynl = nl.range.squeeze()
                if len(xnl.shape) > 0 and len(ynl.shape) > 0:
                    f = interp1d(xnl, ynl)
                    y[k, :] = f(avg_x)

        agg_nl = OutputNL()
        agg_nl.domain = avg_x
        agg_nl.range = y.mean(axis=0)
        agg_nl.range_std = y.std(axis=0)
        aggModel.output_nl = agg_nl

        self.aggregateModel = aggModel
Exemple #5
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		try:
			if n1 < 2 or n2 < 2:
				raise Exception('degenerate case: both groups must contain at least 2 samples')
				
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				if parentSeqGroup1[i] > 0:
					propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				else:
					propGroup1.append( 0.0 )
					note = 'degenerate case: parent group had a count of zero'
				
			propGroup2 = []
			for i in xrange(0, n2):
				if parentSeqGroup2[i] > 0:
					propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
				else:
					propGroup2.append( 0.0 )
					note = 'degenerate case: parent group had a count of zero'
			
			# calculate statistics
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = variance(propGroup1, meanG1)
			varG2 = variance(propGroup2, meanG2)
			
			dof = n1 + n2 - 2
			pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2)
			sqrtPooledVar = math.sqrt(pooledVar)
			denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2)
				
			# p-value
			T_statistic = (meanG1 - meanG2) / denom
			pValue = t.cdf(T_statistic, dof)
			
			# CI
			tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
			lowerCI = dp - tCritical*denom
			upperCI = dp + tCritical*denom

		except Exception as note:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
		except ZeroDivisionError:
			if meanG1 != meanG2:
				pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
			else:
				pValue = 0.5
				
			lowerCI = dp
			upperCI = dp
			note = 'degenerate case: variance of both groups is zero'

		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
Exemple #6
0
DF = (n1 - 1) + (n2 - 1)

print('SE=', SE, 'DF=', DF)

# calculate t-score

tscore = np.abs(((x1 - x2) - 0) / SE)
print(tscore)

# calculate t-value

from scipy.stats.distributions import t

# set confident level equal c1
c1 = 0.95
alpha = 1 - c1
t95 = t.ppf(1.0 - alpha / 2.0, DF)

print(t95)

# set confident level equal c1
c1 = 0.94
alpha = 1 - c1
t95 = t.ppf(1.0 - alpha / 2.0, DF)

print(t95)

f = t.cdf(tscore, DF) - t.cdf(-tscore, DF)
print(f)
Exemple #7
0
# Unexplained variation
uv = (rd**2).sum(1) / (dx.shape[1] - 4)

# (x'x)^{-1} = (vs^2v')^{-1}
xtx = np.dot(vt.T / s**2, vt)

# Standard error for the interaction term
se = np.sqrt(uv * xtx[3, 3])

# Z-scores for the interaction term
zs = params[:, 3] / se
zs = zs.dropna()
zsa = np.abs(zs)

# P-values for the interaction term
pv = student_t.cdf(-np.abs(zs), xmat.shape[0] - xmat.shape[1])

# Bonferroni threshold
bt = norm.ppf(1 - 0.025 / zs.shape[0])

# Calculate the FDR for a range of threshold from 2 to 5.
fdr = []
n = len(zs)
for t in np.linspace(0, 6, 20):
    d = np.sum(zsa > t)
    f = 2 * n * norm.cdf(-t) / d
    fdr.append([t, f, d])
fdr = np.asarray(fdr)

# Plots relating to FDR
plt.clf()
Exemple #8
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		if n1 >= 2 and n2 >= 2:
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				if parentSeqGroup1[i] > 0:
					propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				else:
					propGroup1.append( 0.0 )
					note = 'degenerate case: parent group had a count of zero'
				
			propGroup2 = []
			for i in xrange(0, n2):
				if parentSeqGroup2[i] > 0:
					propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
				else:
					propGroup2.append( 0.0 )
					note = 'degenerate case: parent group had a count of zero'
			
			# calculate p-value, effect size, and CI
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = var(propGroup1, ddof=1)
			varG2 = var(propGroup2, ddof=1)
			
			normVarG1 = varG1 / n1
			normVarG2 = varG2 / n2
			unpooledVar = normVarG1 + normVarG2
			sqrtUnpooledVar = math.sqrt(unpooledVar)
			
			
			if unpooledVar != 0:
				# p-value
				T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar
				dof = (unpooledVar*unpooledVar) / ( (normVarG1*normVarG1)/(n1-1) + (normVarG2*normVarG2)/(n2-1) )
				pValue = t.cdf(T_statistic, dof)
				
				# CI
				tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
				lowerCI = dp - tCritical*sqrtUnpooledVar
				upperCI = dp + tCritical*sqrtUnpooledVar
			else:
				if meanG1 != meanG2:
					pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
				else:
					pValue = 0.5
					
				lowerCI = dp
				upperCI = dp
				
				note = 'degenerate case: variance of both groups is zero'
		else:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
			note = 'degenerate case: both groups must contain at least 2 samples'
	
		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note