def test_distances_individuals_missing_data(self): #Diploid ms1 = eden.MicrosatelliteData(self.data2) dm1 = ms1.getDistanceMatrix(distance="lm") assert dm1[ 0, 1] == 2.0, "Diploid data: Error calculating the linear manhattan distance." dm2 = ms1.getDistanceMatrix(distance="ap") assert dm2[ 0, 1] == 2.0, "Diploid data: Error calculating the allele parsimony distance." #Haploid ms2 = eden.MicrosatelliteDataHaploid(self.data2) dm3 = ms2.getDistanceMatrix(distance="lm") assert dm3[0, 1] == ( 3 * 1.0 + 29.0 ) / 4.0, "Haploid data: Error calculating the linear manhattan distance." dm4 = ms2.getDistanceMatrix(distance="ap") assert dm4[ 0, 1] == 1.0, "Haploid data: Error calculating the allele parsimony distance." #Haploid, large amounts of missing data ms3 = eden.MicrosatelliteDataHaploid(self.data3) dm5 = ms3.getDistanceMatrix(distance="lm") assert dm5[0, 1] == 0.0, "Missing data: Error." assert dm5[ 0, 2] == -1, "Missing data: No matchind data fields should lead to -1 distance."
def test_distances_allele_freqs(self): ms1 = eden.MicrosatelliteDataHaploid(self.data_nonnumeric2) af1 = eden.AlleleFrequencyTable() try: af1.init_msData(ms1, [[0, 1], [2, 3]]) except eden.EDENException, e: assert "input data has" in str(e)
def test_distances_populations_missing_data(self): #Diploid ms1 = eden.MicrosatelliteData(self.data2) dm1 = ms1.getGroupwiseDistanceMatrix(groups=[[0], [1]], distance="goldstein_d1") assert dm1[0, 1] == ( 0.5 * 1 + 0.25 * (1 + 21 * 21 + 19 * 19 + 1) + 0.5 * (29 * 29 + 1) ) / 3.0, "Diploid data: Error calculating the goldstein D1 distance." dm3 = ms1.getGroupwiseDistanceMatrix(groups=[[0], [1]], distance="goldstein") assert dm3[0, 1] == ( ((100 + 101) / 2. - (101) / 1.)**2 + ((200 + 220) / 2. - (201 + 221) / 2.)**2 + ((330) / 1. - (301 + 331) / 2.)** 2) / 3., "Diploid data: Error calculating the goldstein distance." #Haploid ms2 = eden.MicrosatelliteDataHaploid(self.data2) dm2 = ms2.getGroupwiseDistanceMatrix(groups=[[0], [1]], distance="goldstein_d1") assert dm2[0, 1] == ( 1 + 1 + 1 + 29 * 29 ) / 4.0, "Haploid data: Error calculating the goldstein D1 distance." dm4 = ms2.getGroupwiseDistanceMatrix(groups=[[0], [1]], distance="goldstein") assert dm4[0, 1] == ( 1 + 1 + 1 + 29 * 29 ) / 4.0, "Haploid data: Error calculating the goldstein distance."
def test_distances_individuals(self): #Diploid ms1 = eden.MicrosatelliteData(self.data1) dm1 = ms1.getDistanceMatrix(distance="lm") assert dm1[ 0, 1] == 2.0, "Diploid data: Error calculating the linear manhattan distance." dm2 = ms1.getDistanceMatrix(distance="ap") assert dm2[0, 1] == ( 1.0 + 2.0 + 2.0 ) / 3.0, "Diploid data: Error calculating the allele parsimony distance." #Haploid ms2 = eden.MicrosatelliteDataHaploid(self.data1) dm3 = ms2.getDistanceMatrix(distance="lm") assert dm3[0, 1] == ( 4 * 1.0 + 29.0 + 31.0 ) / 6.0, "Haploid data: Error calculating the linear manhattan distance." dm4 = ms2.getDistanceMatrix(distance="ap") assert dm4[ 0, 1] == 1.0, "Haploid data: Error calculating the allele parsimony distance."
def __init__(self, data, groups=True, ind_distance="lm", group_distance="goldstein_d1", ploidity=2): self.tests = self.tests_slow + self.tests_fast if isinstance(data, str) or isinstance(data, unicode): data = open(data, 'r') self.ind_distance = ind_distance self.group_distance = group_distance if groups: locs, names, data = splitMSFile(data) self.data = data self.locs = locs self.names = names self.pops, names = eden.getGoldsteinLists(locs) else: names, data = splitMSFile(data, locations=False) self.data = data self.names = names if ploidity == 2: self.ms = eden.MicrosatelliteData(self.data) elif ploidity == 1: self.ms = eden.MicrosatelliteDataHaploid(self.data) else: raise Exception("invalid ploidity") self.ms_noclones = self.ms.getUniqueSubset() if groups: self.dm_groups = self.get_dm_groups() self.thnet_groups = self.auto_threshold_groups() self.dm_ind = self.get_dm_ind() self.thnet_ind = self.auto_threshold_ind()
def test_distances_abundances(self): data1 = eden.MicrosatelliteDataHaploid(self.abundance_data1) dm1 = data1.getDistanceMatrix(distance="czekanowski") assert dm1[0, 1] == 1 - 2 / 3.0 and dm1[0, 2] == 1 - 0.5 and dm1[ 1, 2] == 1 - 0.4
def test_distances_individuals_nonnumeric(self): ms1 = eden.MicrosatelliteDataHaploid(self.data_nonnumeric1) dm1 = ms1.getDistanceMatrix(distance="ap") assert dm1[ 0, 1] == 1. - 3. / 5., "Error calculating the allele parsimony distance for non-numeric data."