Beispiel #1
0
def calc_twosample_ts(propGroup1, propGroup2):
    n1 = len(propGroup1[0])
    n2 = len(propGroup2[0])
    numFeatures = len(propGroup1)

    T_statistics = []
    effectSizes = []
    notes = []
    for r in xrange(0, numFeatures):
        meanG1 = float(sum(propGroup1[r])) / n1
        varG1 = variance(propGroup1[r], meanG1)
        stdErrG1 = varG1 / n1

        meanG2 = float(sum(propGroup2[r])) / n2
        varG2 = variance(propGroup2[r], meanG2)
        stdErrG2 = varG2 / n2

        dp = meanG1 - meanG2
        effectSizes.append(dp * 100)

        denom = math.sqrt(stdErrG1 + stdErrG2)

        if denom == 0:
            notes.append(
                'degenerate case: zero variance for both groups; variance set to 1e-6.'
            )
            T_statistics.append(dp / 1e-6)
        else:
            notes.append('')
            T_statistics.append(dp / denom)

    return T_statistics, effectSizes, notes
Beispiel #2
0
def calc_twosample_ts(propGroup1, propGroup2):
	n1 = len(propGroup1[0])
	n2 = len(propGroup2[0]) 
	numFeatures = len(propGroup1)
	
	T_statistics = []
	effectSizes = []
	notes = []
	for r in xrange(0, numFeatures):
		meanG1 = float(sum(propGroup1[r])) / n1
		varG1 = variance(propGroup1[r], meanG1)
		stdErrG1 = varG1 / n1

		meanG2 = float(sum(propGroup2[r])) / n2
		varG2 = variance(propGroup2[r], meanG2)
		stdErrG2 = varG2 / n2 
		
		dp = meanG1 - meanG2
		effectSizes.append(dp*100)
		
		denom = math.sqrt(stdErrG1 + stdErrG2)

		if denom == 0:
			notes.append('degenerate case: zero variance for both groups; variance set to 1e-6.')
			T_statistics.append(dp/1e-6) 
		else:
			notes.append('')
			T_statistics.append(dp/denom) 

	return T_statistics, effectSizes, notes
Beispiel #3
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		if n1 >= 2 and n2 >= 2:
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				
			propGroup2 = []
			for i in xrange(0, n2):
				propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
			
			# calculate p-value, effect size, and CI
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = variance(propGroup1, meanG1)
			varG2 = variance(propGroup2, meanG2)
			
			normVarG1 = varG1 / n1
			normVarG2 = varG2 / n2
			unpooledVar = normVarG1 + normVarG2
			sqrtUnpooledVar = math.sqrt(unpooledVar)
			
			
			if unpooledVar != 0:
				# p-value
				T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar
				dof = (unpooledVar*unpooledVar) / ( (normVarG1*normVarG1)/(n1-1) + (normVarG2*normVarG2)/(n2-1) )
				pValue = t.cdf(T_statistic, dof)
				
				# CI
				tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
				lowerCI = dp - tCritical*sqrtUnpooledVar
				upperCI = dp + tCritical*sqrtUnpooledVar
			else:
				if meanG1 != meanG2:
					pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
				else:
					pValue = 0.5
					
				lowerCI = dp
				upperCI = dp
				
				note = 'degenerate case: variance of both groups is zero'
		else:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
			note = 'degenerate case: both groups must contain at least 2 samples'
	
		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
Beispiel #4
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		try:
			if n1 < 2 or n2 < 2:
				raise Exception('degenerate case: both groups must contain at least 2 samples')
				
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				
			propGroup2 = []
			for i in xrange(0, n2):
				propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
			
			# calculate statistics
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = variance(propGroup1, meanG1)
			varG2 = variance(propGroup2, meanG2)
			
			dof = n1 + n2 - 2
			pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2)
			sqrtPooledVar = math.sqrt(pooledVar)
			denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2)
				
			# p-value
			T_statistic = (meanG1 - meanG2) / denom
			pValue = t.cdf(T_statistic, dof)
			
			# CI
			tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
			lowerCI = dp - tCritical*denom
			upperCI = dp + tCritical*denom

		except Exception as note:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
		except ZeroDivisionError:
			if meanG1 != meanG2:
				pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
			else:
				pValue = 0.5
				
			lowerCI = dp
			upperCI = dp
			note = 'degenerate case: variance of both groups is zero'

		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
Beispiel #5
0
#=======================================================================
Beispiel #6
0
	def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage):
		note = ''
		
		n1 = len(seqGroup1)
		n2 = len(seqGroup2)
		
		try:
			if n1 < 2 or n2 < 2:
				raise Exception('degenerate case: both groups must contain at least 2 samples')
				
			# calculate proportions
			propGroup1 = []
			for i in xrange(0, n1):
				if parentSeqGroup1[i] > 0:
					propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
				else:
					propGroup1.append( 0.0 )
					note = 'degenerate case: parent group had a count of zero'
				
			propGroup2 = []
			for i in xrange(0, n2):
				if parentSeqGroup2[i] > 0:
					propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
				else:
					propGroup2.append( 0.0 )
					note = 'degenerate case: parent group had a count of zero'
			
			# calculate statistics
			meanG1 = float(sum(propGroup1)) / n1
			meanG2 = float(sum(propGroup2)) / n2
			dp = meanG1 - meanG2
			
			varG1 = variance(propGroup1, meanG1)
			varG2 = variance(propGroup2, meanG2)
			
			dof = n1 + n2 - 2
			pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2)
			sqrtPooledVar = math.sqrt(pooledVar)
			denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2)
				
			# p-value
			T_statistic = (meanG1 - meanG2) / denom
			pValue = t.cdf(T_statistic, dof)
			
			# CI
			tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution
			lowerCI = dp - tCritical*denom
			upperCI = dp + tCritical*denom

		except Exception as note:
			pValue = 0.5
			lowerCI = 0.0
			upperCI = 0.0
			dp = 0.0
		except ZeroDivisionError:
			if meanG1 != meanG2:
				pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance
			else:
				pValue = 0.5
				
			lowerCI = dp
			upperCI = dp
			note = 'degenerate case: variance of both groups is zero'

		return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
Beispiel #7
0
#=======================================================================
Beispiel #8
0
#=======================================================================
Beispiel #9
0
    def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2,
            confIntervMethod, coverage):
        note = ''

        n1 = len(seqGroup1)
        n2 = len(seqGroup2)

        if n1 >= 2 and n2 >= 2:
            # calculate proportions
            propGroup1 = []
            for i in xrange(0, n1):
                if parentSeqGroup1[i] > 0:
                    propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i])
                else:
                    propGroup1.append(0.0)
                    note = 'degenerate case: parent group had a count of zero'

            propGroup2 = []
            for i in xrange(0, n2):
                if parentSeqGroup2[i] > 0:
                    propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i])
                else:
                    propGroup2.append(0.0)
                    note = 'degenerate case: parent group had a count of zero'

            # calculate p-value, effect size, and CI
            meanG1 = float(sum(propGroup1)) / n1
            meanG2 = float(sum(propGroup2)) / n2
            dp = meanG1 - meanG2

            varG1 = variance(propGroup1, meanG1)
            varG2 = variance(propGroup2, meanG2)

            normVarG1 = varG1 / n1
            normVarG2 = varG2 / n2
            unpooledVar = normVarG1 + normVarG2
            sqrtUnpooledVar = math.sqrt(unpooledVar)

            if unpooledVar != 0:
                # p-value
                T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar
                dof = (unpooledVar * unpooledVar) / ((normVarG1 * normVarG1) /
                                                     (n1 - 1) +
                                                     (normVarG2 * normVarG2) /
                                                     (n2 - 1))
                pValue = t.cdf(T_statistic, dof)

                # CI
                tCritical = t.isf(
                    0.5 * (1.0 - coverage), dof
                )  # 0.5 factor accounts from symmetric nature of distribution
                lowerCI = dp - tCritical * sqrtUnpooledVar
                upperCI = dp + tCritical * sqrtUnpooledVar
            else:
                if meanG1 != meanG2:
                    pValue = 0.0  # the difference (at least according to these samples) must be true as there is no variance
                else:
                    pValue = 0.5

                lowerCI = dp
                upperCI = dp

                note = 'degenerate case: variance of both groups is zero'
        else:
            pValue = 0.5
            lowerCI = 0.0
            upperCI = 0.0
            dp = 0.0
            note = 'degenerate case: both groups must contain at least 2 samples'

        return 1.0 - pValue, 2 * min(
            pValue, 1.0 - pValue), lowerCI * 100, upperCI * 100, dp * 100, note
Beispiel #10
0
#=======================================================================