Beispiel #1
0
    def test_distances_individuals_missing_data(self):
        #Diploid
        ms1 = eden.MicrosatelliteData(self.data2)
        dm1 = ms1.getDistanceMatrix(distance="lm")
        assert dm1[
            0,
            1] == 2.0, "Diploid data: Error calculating the linear manhattan distance."
        dm2 = ms1.getDistanceMatrix(distance="ap")
        assert dm2[
            0,
            1] == 2.0, "Diploid data: Error calculating the allele parsimony distance."

        #Haploid
        ms2 = eden.MicrosatelliteDataHaploid(self.data2)
        dm3 = ms2.getDistanceMatrix(distance="lm")
        assert dm3[0, 1] == (
            3 * 1.0 + 29.0
        ) / 4.0, "Haploid data: Error calculating the linear manhattan distance."
        dm4 = ms2.getDistanceMatrix(distance="ap")
        assert dm4[
            0,
            1] == 1.0, "Haploid data: Error calculating the allele parsimony distance."

        #Haploid, large amounts of missing data
        ms3 = eden.MicrosatelliteDataHaploid(self.data3)
        dm5 = ms3.getDistanceMatrix(distance="lm")
        assert dm5[0, 1] == 0.0, "Missing data: Error."
        assert dm5[
            0,
            2] == -1, "Missing data: No matchind data fields should lead to -1 distance."
Beispiel #2
0
 def test_distances_allele_freqs(self):
     ms1 = eden.MicrosatelliteDataHaploid(self.data_nonnumeric2)
     af1 = eden.AlleleFrequencyTable()
     try:
         af1.init_msData(ms1, [[0, 1], [2, 3]])
     except eden.EDENException, e:
         assert "input data has" in str(e)
Beispiel #3
0
    def test_distances_populations_missing_data(self):
        #Diploid
        ms1 = eden.MicrosatelliteData(self.data2)
        dm1 = ms1.getGroupwiseDistanceMatrix(groups=[[0], [1]],
                                             distance="goldstein_d1")
        assert dm1[0, 1] == (
            0.5 * 1 + 0.25 * (1 + 21 * 21 + 19 * 19 + 1) + 0.5 * (29 * 29 + 1)
        ) / 3.0, "Diploid data: Error calculating the goldstein D1 distance."

        dm3 = ms1.getGroupwiseDistanceMatrix(groups=[[0], [1]],
                                             distance="goldstein")
        assert dm3[0, 1] == (
            ((100 + 101) / 2. - (101) / 1.)**2 +
            ((200 + 220) / 2. - (201 + 221) / 2.)**2 + ((330) / 1. -
                                                        (301 + 331) / 2.)**
            2) / 3., "Diploid data: Error calculating the goldstein distance."

        #Haploid
        ms2 = eden.MicrosatelliteDataHaploid(self.data2)
        dm2 = ms2.getGroupwiseDistanceMatrix(groups=[[0], [1]],
                                             distance="goldstein_d1")
        assert dm2[0, 1] == (
            1 + 1 + 1 + 29 * 29
        ) / 4.0, "Haploid data: Error calculating the goldstein D1 distance."

        dm4 = ms2.getGroupwiseDistanceMatrix(groups=[[0], [1]],
                                             distance="goldstein")
        assert dm4[0, 1] == (
            1 + 1 + 1 + 29 * 29
        ) / 4.0, "Haploid data: Error calculating the goldstein distance."
Beispiel #4
0
    def test_distances_individuals(self):
        #Diploid
        ms1 = eden.MicrosatelliteData(self.data1)
        dm1 = ms1.getDistanceMatrix(distance="lm")
        assert dm1[
            0,
            1] == 2.0, "Diploid data: Error calculating the linear manhattan distance."
        dm2 = ms1.getDistanceMatrix(distance="ap")
        assert dm2[0, 1] == (
            1.0 + 2.0 + 2.0
        ) / 3.0, "Diploid data: Error calculating the allele parsimony distance."

        #Haploid
        ms2 = eden.MicrosatelliteDataHaploid(self.data1)
        dm3 = ms2.getDistanceMatrix(distance="lm")
        assert dm3[0, 1] == (
            4 * 1.0 + 29.0 + 31.0
        ) / 6.0, "Haploid data: Error calculating the linear manhattan distance."
        dm4 = ms2.getDistanceMatrix(distance="ap")
        assert dm4[
            0,
            1] == 1.0, "Haploid data: Error calculating the allele parsimony distance."
Beispiel #5
0
    def __init__(self,
                 data,
                 groups=True,
                 ind_distance="lm",
                 group_distance="goldstein_d1",
                 ploidity=2):
        self.tests = self.tests_slow + self.tests_fast
        if isinstance(data, str) or isinstance(data, unicode):
            data = open(data, 'r')

        self.ind_distance = ind_distance
        self.group_distance = group_distance

        if groups:
            locs, names, data = splitMSFile(data)
            self.data = data
            self.locs = locs
            self.names = names
            self.pops, names = eden.getGoldsteinLists(locs)
        else:
            names, data = splitMSFile(data, locations=False)
            self.data = data
            self.names = names

        if ploidity == 2:
            self.ms = eden.MicrosatelliteData(self.data)
        elif ploidity == 1:
            self.ms = eden.MicrosatelliteDataHaploid(self.data)
        else:
            raise Exception("invalid ploidity")

        self.ms_noclones = self.ms.getUniqueSubset()
        if groups:
            self.dm_groups = self.get_dm_groups()
            self.thnet_groups = self.auto_threshold_groups()
        self.dm_ind = self.get_dm_ind()
        self.thnet_ind = self.auto_threshold_ind()
Beispiel #6
0
 def test_distances_abundances(self):
     data1 = eden.MicrosatelliteDataHaploid(self.abundance_data1)
     dm1 = data1.getDistanceMatrix(distance="czekanowski")
     assert dm1[0, 1] == 1 - 2 / 3.0 and dm1[0, 2] == 1 - 0.5 and dm1[
         1, 2] == 1 - 0.4
Beispiel #7
0
 def test_distances_individuals_nonnumeric(self):
     ms1 = eden.MicrosatelliteDataHaploid(self.data_nonnumeric1)
     dm1 = ms1.getDistanceMatrix(distance="ap")
     assert dm1[
         0,
         1] == 1. - 3. / 5., "Error calculating the allele parsimony distance for non-numeric data."