Esempio n. 1
0
	def estimate_Jarne2006(self, data_matrix):
		"""
		2007-08-14
			method based on Jarne2006
		"""
		sys.stderr.write("Jarne2006 method ...\n")
		s_estimate_result_instance = s_estimate_result()
		
		from EstimateSelfingGeneration import EstimateSelfingGeneration
		EstimateSelfingGeneration_instance = EstimateSelfingGeneration()
		locus_allele_prob_vector = EstimateSelfingGeneration_instance.cal_locus_allele_prob_vector(data_matrix)
		observed_heterozygosity_vector = self.cal_observed_heterozygosity_vector(data_matrix)
		FIS_vector = self.cal_FIS_vector(locus_allele_prob_vector, observed_heterozygosity_vector)
		selfing_rate_vector = self.cal_selfing_rate_vector(FIS_vector)
		s_estimate_result_instance.FIS_vector = FIS_vector
		s_estimate_result_instance.selfing_rate_vector = selfing_rate_vector
		return s_estimate_result_instance
Esempio n. 2
0
    def estimate_Jarne2006(self, data_matrix):
        """
		2007-08-14
			method based on Jarne2006
		"""
        sys.stderr.write("Jarne2006 method ...\n")
        s_estimate_result_instance = s_estimate_result()

        from EstimateSelfingGeneration import EstimateSelfingGeneration
        EstimateSelfingGeneration_instance = EstimateSelfingGeneration()
        locus_allele_prob_vector = EstimateSelfingGeneration_instance.cal_locus_allele_prob_vector(
            data_matrix)
        observed_heterozygosity_vector = self.cal_observed_heterozygosity_vector(
            data_matrix)
        FIS_vector = self.cal_FIS_vector(locus_allele_prob_vector,
                                         observed_heterozygosity_vector)
        selfing_rate_vector = self.cal_selfing_rate_vector(FIS_vector)
        s_estimate_result_instance.FIS_vector = FIS_vector
        s_estimate_result_instance.selfing_rate_vector = selfing_rate_vector
        return s_estimate_result_instance
Esempio n. 3
0
    def estimate_David2007_g2(self, data_matrix):
        """
		2007-08-15
			David2007
		"""
        sys.stderr.write("David2007 g2 method ...\n")
        no_of_strains, no_of_snps = data_matrix.shape
        s_estimate_result_instance = s_estimate_result()
        Hijk = Numeric.zeros([no_of_snps, no_of_snps],
                             Numeric.Float)  #each entry is \sum_k Hik*Hjk
        Hijkl = Numeric.zeros(
            [no_of_snps, no_of_snps],
            Numeric.Float)  #each entry is \sum_k \sum_{l!=k} Hik*Hjl
        Mij = Numeric.zeros(
            [no_of_snps, no_of_snps],
            Numeric.Float)  #number of individuals with NA at locus both i an j
        Mi = Numeric.zeros(
            no_of_snps,
            Numeric.Float)  #number of individuals with NA at locus i
        for i in range(no_of_snps):
            for j in range(no_of_snps):
                if j == i:
                    continue
                for k in range(no_of_strains):
                    if data_matrix[k, i] == 0:
                        Mi[i] += 1  #this one is inflated no_of_snps-1 times due to the inner j loop
                        if data_matrix[k, j] == 0:
                            Mij[i, j] += 1
                    Hik = int(
                        data_matrix[k, i] > 4)  #NA=0 is treated as homozygous
                    Hjk = int(data_matrix[k, j] > 4)
                    Hijk[i, j] += (Hik * Hjk)
                    for l in range(no_of_strains):
                        if l == k:
                            continue
                        Hik = int(data_matrix[k, i] >
                                  4)  #NA=0 is treated as homozygous
                        Hjl = int(data_matrix[l, j] > 4)
                        Hijkl[i, j] += (Hik * Hjl)
        Mi = Mi / (no_of_snps - 1)

        numerator = 0.0
        denominator = 0.0
        for i in range(no_of_snps):
            for j in range(no_of_snps):
                if j == i:
                    continue
                numerator += 1.0 / (no_of_strains - Mij[i, j]) * Hijk[i, j]
                denominator += 1.0 / (no_of_strains * (no_of_strains - 1) -
                                      Mi[i] * Mi[j] + Mij[i, j]) * Hijkl[i, j]
        if denominator > 0.0:
            g2 = numerator / denominator
            s_estimate_result_instance.g2_David2007 = g2
        else:
            g2 = None
        if g2:  #g2=0 causes ZeroDivisionError: float division in calculating s
            s = (1 + 5 * g2 - math.sqrt(1 + 10 * g2 + 9 * g2 * g2)) / (2 * g2)
            FIS = s / (2 - s)

            from EstimateSelfingGeneration import EstimateSelfingGeneration
            EstimateSelfingGeneration_instance = EstimateSelfingGeneration()
            locus_allele_prob_vector = EstimateSelfingGeneration_instance.cal_locus_allele_prob_vector(
                data_matrix)
            observed_heterozygosity_vector = self.cal_observed_heterozygosity_vector(
                data_matrix)
            FIS_vector = self.cal_FIS_vector(locus_allele_prob_vector,
                                             observed_heterozygosity_vector)

            genotyping_error_func = lambda x: (x - FIS) / (1 - FIS)
            genotyping_error_rate_vector = map(genotyping_error_func,
                                               FIS_vector)

            s_estimate_result_instance.s_g2_David2007 = s
            s_estimate_result_instance.FIS_vector = FIS_vector
            s_estimate_result_instance.genotyping_error_rate_vector = genotyping_error_rate_vector

        return s_estimate_result_instance
Esempio n. 4
0
	def estimate_David2007_g2(self, data_matrix):
		"""
		2007-08-15
			David2007
		"""
		sys.stderr.write("David2007 g2 method ...\n")
		no_of_strains, no_of_snps = data_matrix.shape
		s_estimate_result_instance = s_estimate_result()
		Hijk = Numeric.zeros([no_of_snps, no_of_snps], Numeric.Float)	#each entry is \sum_k Hik*Hjk
		Hijkl = Numeric.zeros([no_of_snps, no_of_snps], Numeric.Float)	#each entry is \sum_k \sum_{l!=k} Hik*Hjl
		Mij = Numeric.zeros([no_of_snps, no_of_snps], Numeric.Float)	#number of individuals with NA at locus both i an j
		Mi = Numeric.zeros(no_of_snps, Numeric.Float)	#number of individuals with NA at locus i
		for i in range(no_of_snps):
			for j in range(no_of_snps):
				if j==i:
					continue
				for k in range(no_of_strains):
					if data_matrix[k,i]==0:
						Mi[i] += 1	#this one is inflated no_of_snps-1 times due to the inner j loop
						if data_matrix[k,j]==0:
							Mij[i,j] += 1
					Hik = int(data_matrix[k,i]>4)	#NA=0 is treated as homozygous
					Hjk = int(data_matrix[k,j]>4)
					Hijk[i,j] += (Hik*Hjk)
					for l in range(no_of_strains):
						if l==k:
							continue
						Hik = int(data_matrix[k,i]>4)	#NA=0 is treated as homozygous
						Hjl = int(data_matrix[l,j]>4)
						Hijkl[i,j] += (Hik*Hjl)
		Mi = Mi/(no_of_snps-1)
		
		numerator = 0.0
		denominator = 0.0
		for i in range(no_of_snps):
			for j in range(no_of_snps):
				if j==i:
					continue
				numerator += 1.0/(no_of_strains-Mij[i,j])*Hijk[i,j]
				denominator += 1.0/(no_of_strains*(no_of_strains-1)-Mi[i]*Mi[j] + Mij[i,j])*Hijkl[i,j]
		if denominator>0.0:
			g2 = numerator/denominator
			s_estimate_result_instance.g2_David2007 = g2
		else:
			g2=None
		if g2:	#g2=0 causes ZeroDivisionError: float division in calculating s
			s = (1+5*g2-math.sqrt(1+10*g2+9*g2*g2))/(2*g2)
			FIS = s/(2-s)
			
			from EstimateSelfingGeneration import EstimateSelfingGeneration
			EstimateSelfingGeneration_instance = EstimateSelfingGeneration()
			locus_allele_prob_vector = EstimateSelfingGeneration_instance.cal_locus_allele_prob_vector(data_matrix)
			observed_heterozygosity_vector = self.cal_observed_heterozygosity_vector(data_matrix)
			FIS_vector = self.cal_FIS_vector(locus_allele_prob_vector, observed_heterozygosity_vector)
			
			genotyping_error_func = lambda x: (x-FIS)/(1-FIS)
			genotyping_error_rate_vector = map(genotyping_error_func, FIS_vector)
			
			s_estimate_result_instance.s_g2_David2007 = s
			s_estimate_result_instance.FIS_vector = FIS_vector
			s_estimate_result_instance.genotyping_error_rate_vector = genotyping_error_rate_vector
		
		return s_estimate_result_instance