Ejemplo n.º 1
0
 def test_make_unifrac_metric2(self):
     """ samples with no seqs, and identical samples, should behave correctly
     """
     tree = parse_newick(self.l19_treestr, PhyloNode)
     unif = make_unifrac_metric(False, unifrac, True)
     otu_data = numpy.array([
         [0, 0, 0, 0, 0, 0, 0, 0, 0],  #sam1 zeros
         [4, 2, 0, 0, 0, 1, 0, 0, 0],
         [2, 4, 0, 0, 0, 1, 0, 0, 0],
         [1, 7, 0, 0, 0, 0, 0, 0, 0],
         [0, 8, 0, 0, 0, 0, 0, 0, 0],
         [0, 7, 1, 0, 0, 0, 0, 0, 0],
         [0, 4, 2, 0, 0, 0, 2, 0, 0],
         [0, 2, 4, 0, 0, 0, 1, 0, 0],
         [0, 1, 7, 0, 0, 0, 0, 0, 0],
         [0, 0, 8, 0, 0, 0, 0, 0, 0],
         [0, 0, 7, 1, 0, 0, 0, 0, 0],
         [0, 0, 4, 2, 0, 0, 0, 3, 0],
         [0, 0, 2, 4, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],  #sam14 zeros
         [0, 0, 0, 8, 0, 0, 0, 0, 0],
         [0, 0, 2, 4, 0, 0, 0, 1, 0],  #sam 16 now like sam 13
         [0, 0, 0, 4, 2, 0, 0, 0, 4],
         [0, 0, 0, 2, 4, 0, 0, 0, 1],
         [0, 0, 0, 1, 7, 0, 0, 0, 0]
     ])
     warnings.filterwarnings('ignore')
     res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names)
     envs = make_envs_dict(self.l19_data, self.l19_sample_names,
                           self.l19_taxon_names)
     self.assertEqual(res[0, 0], 0)
     self.assertEqual(res[0, 13], 0.0)
     self.assertEqual(res[12, 15], 0.0)
     self.assertEqual(res[0, 1], 1.0)
     warnings.resetwarnings()
Ejemplo n.º 2
0
    def result(data, taxon_names, tree, sample_names, one_sample_name,
               **kwargs):
        """ wraps the fast_unifrac fn to return just a matrix, in correct order

            sample_names: list of unique strings
        """
        envs = make_envs_dict(data, sample_names, taxon_names)
        try:
            unifrac_res = fast_unifrac_one_sample(one_sample_name,
                                                  tree,
                                                  envs,
                                                  weighted=weighted,
                                                  metric=metric,
                                                  **kwargs)
        except ValueError as e:
            if 'one_sample_name not found' in str(e):
                warnings.warn('unifrac had no information on sample ' +\
                    one_sample_name +\
                     ". Distances involving that sample aren't meaningful")
                unifrac_res = (numpy.array([0.0]), [one_sample_name]
                               )  # self only
            else:
                raise e
        dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res, sample_names)
        return dist_mtx
Ejemplo n.º 3
0
 def test_make_envs_dict(self):
     """ make_envs_dict should have the same abundance for each taxon
     as the matrix that made the dict"""
     envs = make_envs_dict(self.l19_data, self.l19_sample_names,
                           self.l19_taxon_names)
     for key in envs.keys():
         col_idx = self.l19_taxon_names.index(key)
         self.assertEqual(sum(envs[key].values()),
                          self.l19_data[:, col_idx].sum())
Ejemplo n.º 4
0
 def test_make_envs_dict(self):
     """ make_envs_dict should have the same abundance for each taxon
     as the matrix that made the dict"""
     envs = make_envs_dict(self.l19_data, self.l19_sample_names,
         self.l19_taxon_names)
     for key in envs.keys():
         col_idx = self.l19_taxon_names.index(key)
         self.assertEqual(sum(envs[key].values()),
             self.l19_data[:,col_idx].sum())
Ejemplo n.º 5
0
 def test_make_unifrac_metric(self):
     """ exercise of the unweighted unifrac metric should not throw errors"""
     tree = parse_newick(self.l19_treestr, PhyloNode)
     unif = make_unifrac_metric(False, unifrac, True)
     res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names)
     envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names)
     unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=["distance_matrix"])["distance_matrix"]
     self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names))
     self.assertEqual(res[0, 0], 0)
     self.assertEqual(res[0, 3], 0.0)
     self.assertNotEqual(res[0, 1], 1.0)
Ejemplo n.º 6
0
    def result(data, taxon_names, tree, sample_names, one_sample_name,**kwargs):
        """ wraps the fast_unifrac fn to return just a matrix, in correct order

            sample_names: list of unique strings
        """
        envs = make_envs_dict(data, sample_names, taxon_names)
        unifrac_res = fast_unifrac_one_sample(one_sample_name,
            tree, envs, weighted=weighted, metric=metric,**kwargs)
        dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res,
            sample_names)
        return dist_mtx
Ejemplo n.º 7
0
    def result(data, taxon_names, tree, sample_names, **kwargs):
        """ wraps the fast_unifrac fn to return just a matrix, in correct order
        
            sample_names: list of unique strings
        """

        envs = make_envs_dict(data, sample_names, taxon_names)
        unifrac_res = fast_unifrac(
            tree, envs, weighted=weighted, metric=metric, is_symmetric=is_symmetric, modes=["distance_matrix"], **kwargs
        )
        dist_mtx = _reorder_unifrac_res(unifrac_res["distance_matrix"], sample_names)
        return dist_mtx
Ejemplo n.º 8
0
    def test_make_unifrac_row_metric3(self):
        treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, ' +\
            '((tax6:.09):0.43):0.5):.2,' +\
            '(tax9:0.3, endbigtaxon:.08));'  # taxa 1,2,3 removed
        tree = parse_newick(treestr, PhyloNode)

        otu_data = numpy.array([
            [7, 1, 0, 0, 0, 0, 0, 0, 0],  # 1 now zeros
            [4, 2, 0, 0, 0, 1, 0, 0, 0],
            [2, 4, 0, 0, 0, 1, 0, 0, 0],
            [1, 7, 0, 0, 0, 0, 0, 0, 0],  # 4 now zeros
            [0, 8, 0, 0, 0, 0, 0, 0, 0],
            [0, 7, 1, 0, 0, 0, 0, 0, 0],
            [0, 4, 2, 0, 0, 0, 2, 0, 0],
            [0, 2, 4, 0, 0, 0, 1, 0, 0],
            [0, 1, 7, 0, 0, 0, 0, 0, 0],
            [0, 0, 8, 0, 0, 0, 0, 0, 0],
            [0, 0, 7, 1, 0, 0, 0, 0, 0],
            [0, 0, 4, 2, 0, 0, 0, 3, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],
            [0, 0, 1, 7, 0, 0, 0, 0, 0],
            [0, 0, 0, 8, 0, 0, 0, 0, 0],
            [0, 0, 0, 7, 1, 0, 0, 0, 0],
            [0, 0, 0, 4, 2, 0, 0, 0, 4],
            [0, 0, 0, 2, 4, 0, 0, 0, 1],
            [0, 0, 0, 1, 7, 0, 0, 0, 0]
        ])

        unif = make_unifrac_metric(False, unifrac, True)
        warnings.filterwarnings('ignore')
        res = unif(otu_data, self.l19_taxon_names, tree,
                   self.l19_sample_names)
        warnings.resetwarnings()
        envs = make_envs_dict(self.l19_data, self.l19_sample_names,
                              self.l19_taxon_names)
        self.assertEqual(res[0, 0], 0)
        self.assertEqual(res[0, 3], 0.0)
        self.assertEqual(res[0, 1], 1.0)

        warnings.filterwarnings('ignore')
        unif_row = make_unifrac_row_metric(False, unifrac, True)
        for i, sam_name in enumerate(self.l19_sample_names):
            if i in [0, 3, 4, 5, 8, 9]:
                continue
            # these have no data and are warned "meaningless".
            # I Would prefer if they matched res anyway though
            res_row = unif_row(otu_data, self.l19_taxon_names, tree,
                               self.l19_sample_names, sam_name)
            for j in range(len(self.l19_sample_names)):
                if j in [0, 3, 4, 5, 8, 9]:
                    continue  # ok if meaningless number in zero sample
                self.assertEqual(res_row[j], res[i, j])
        warnings.resetwarnings()
Ejemplo n.º 9
0
    def result(data, taxon_names, tree, sample_names, **kwargs):
        """ wraps the fast_unifrac fn to return just a matrix, in correct order
        
            sample_names: list of unique strings
        """

        envs = make_envs_dict(data, sample_names, taxon_names)
        unifrac_res = fast_unifrac(tree, envs, weighted=weighted, metric=metric,
            is_symmetric=is_symmetric, modes=["distance_matrix"],**kwargs)
        dist_mtx = _reorder_unifrac_res(unifrac_res['distance_matrix'],
            sample_names)
        return dist_mtx
Ejemplo n.º 10
0
    def test_make_unifrac_row_metric3(self):
        treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, ' +\
            '((tax6:.09):0.43):0.5):.2,' +\
            '(tax9:0.3, endbigtaxon:.08));'  # taxa 1,2,3 removed
        tree = parse_newick(treestr, PhyloNode)

        otu_data = numpy.array([
            [7, 1, 0, 0, 0, 0, 0, 0, 0],  # 1 now zeros
            [4, 2, 0, 0, 0, 1, 0, 0, 0],
            [2, 4, 0, 0, 0, 1, 0, 0, 0],
            [1, 7, 0, 0, 0, 0, 0, 0, 0],  # 4 now zeros
            [0, 8, 0, 0, 0, 0, 0, 0, 0],
            [0, 7, 1, 0, 0, 0, 0, 0, 0],
            [0, 4, 2, 0, 0, 0, 2, 0, 0],
            [0, 2, 4, 0, 0, 0, 1, 0, 0],
            [0, 1, 7, 0, 0, 0, 0, 0, 0],
            [0, 0, 8, 0, 0, 0, 0, 0, 0],
            [0, 0, 7, 1, 0, 0, 0, 0, 0],
            [0, 0, 4, 2, 0, 0, 0, 3, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],
            [0, 0, 1, 7, 0, 0, 0, 0, 0],
            [0, 0, 0, 8, 0, 0, 0, 0, 0],
            [0, 0, 0, 7, 1, 0, 0, 0, 0],
            [0, 0, 0, 4, 2, 0, 0, 0, 4],
            [0, 0, 0, 2, 4, 0, 0, 0, 1],
            [0, 0, 0, 1, 7, 0, 0, 0, 0]
        ])

        unif = make_unifrac_metric(False, unifrac, True)
        warnings.filterwarnings('ignore')
        res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names)
        warnings.resetwarnings()
        envs = make_envs_dict(self.l19_data, self.l19_sample_names,
                              self.l19_taxon_names)
        self.assertEqual(res[0, 0], 0)
        self.assertEqual(res[0, 3], 0.0)
        self.assertEqual(res[0, 1], 1.0)

        warnings.filterwarnings('ignore')
        unif_row = make_unifrac_row_metric(False, unifrac, True)
        for i, sam_name in enumerate(self.l19_sample_names):
            if i in [0, 3, 4, 5, 8, 9]:
                continue
            # these have no data and are warned "meaningless".
            # I Would prefer if they matched res anyway though
            res_row = unif_row(otu_data, self.l19_taxon_names, tree,
                               self.l19_sample_names, sam_name)
            for j in range(len(self.l19_sample_names)):
                if j in [0, 3, 4, 5, 8, 9]:
                    continue  # ok if meaningless number in zero sample
                self.assertAlmostEqual(res_row[j], res[i, j])
        warnings.resetwarnings()
Ejemplo n.º 11
0
    def result(data, taxon_names, tree, sample_names, one_sample_name):
        """ wraps the fast_unifrac fn to return just a matrix, in correct order

            sample_names: list of unique strings
        """
        envs = make_envs_dict(data, sample_names, taxon_names)
        unifrac_res = fast_unifrac_one_sample(one_sample_name,
                                              tree,
                                              envs,
                                              weighted=weighted,
                                              metric=metric)
        dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res, sample_names)
        return dist_mtx
Ejemplo n.º 12
0
    def test_make_unifrac_row_metric2(self):
        """ samples with no seqs, and identical samples, should behave correctly
        """
        tree = parse_newick(self.l19_treestr, PhyloNode)
        unif = make_unifrac_metric(False, unifrac, True)
        otu_data = numpy.array([
            [0, 0, 0, 0, 0, 0, 0, 0, 0],  # sam1 zeros
            [4, 2, 0, 0, 0, 1, 0, 0, 0],
            [2, 4, 0, 0, 0, 1, 0, 0, 0],
            [1, 7, 0, 0, 0, 0, 0, 0, 0],
            [0, 8, 0, 0, 0, 0, 0, 0, 0],
            [0, 7, 1, 0, 0, 0, 0, 0, 0],
            [0, 4, 2, 0, 0, 0, 2, 0, 0],
            [0, 2, 4, 0, 0, 0, 1, 0, 0],
            [0, 1, 7, 0, 0, 0, 0, 0, 0],
            [0, 0, 8, 0, 0, 0, 0, 0, 0],
            [0, 0, 7, 1, 0, 0, 0, 0, 0],
            [0, 0, 4, 2, 0, 0, 0, 3, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0],  # sam14 zeros
            [0, 0, 0, 8, 0, 0, 0, 0, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],  # sam 16 now like sam 13
            [0, 0, 0, 4, 2, 0, 0, 0, 4],
            [0, 0, 0, 2, 4, 0, 0, 0, 1],
            [0, 0, 0, 1, 7, 0, 0, 0, 0]
        ])
        warnings.filterwarnings('ignore')
        res = unif(otu_data, self.l19_taxon_names, tree,
                   self.l19_sample_names)
        envs = make_envs_dict(self.l19_data, self.l19_sample_names,
                              self.l19_taxon_names)
        self.assertEqual(res[0, 0], 0)
        self.assertEqual(res[0, 13], 0.0)
        self.assertEqual(res[12, 15], 0.0)
        self.assertEqual(res[0, 1], 1.0)
        warnings.resetwarnings()

        warnings.filterwarnings('ignore')
        unif_row = make_unifrac_row_metric(False, unifrac, True)
        for i, sam_name in enumerate(self.l19_sample_names):
            if i in [0]:
                continue
            # these have no data and are warned "meaningless".
            # I Would prefer if they matched res anyway though
            res_row = unif_row(otu_data, self.l19_taxon_names, tree,
                               self.l19_sample_names, sam_name)
            for j in range(len((self.l19_sample_names))):
                if j in [0]:
                    continue  # ok if meaningless number in zero sample
                self.assertEqual(res_row[j], res[i, j])
        warnings.resetwarnings()
Ejemplo n.º 13
0
    def test_make_unifrac_row_metric2(self):
        """ samples with no seqs, and identical samples, should behave correctly
        """
        tree = parse_newick(self.l19_treestr, PhyloNode)
        unif = make_unifrac_metric(False, unifrac, True)
        otu_data = numpy.array([
            [0, 0, 0, 0, 0, 0, 0, 0, 0],  # sam1 zeros
            [4, 2, 0, 0, 0, 1, 0, 0, 0],
            [2, 4, 0, 0, 0, 1, 0, 0, 0],
            [1, 7, 0, 0, 0, 0, 0, 0, 0],
            [0, 8, 0, 0, 0, 0, 0, 0, 0],
            [0, 7, 1, 0, 0, 0, 0, 0, 0],
            [0, 4, 2, 0, 0, 0, 2, 0, 0],
            [0, 2, 4, 0, 0, 0, 1, 0, 0],
            [0, 1, 7, 0, 0, 0, 0, 0, 0],
            [0, 0, 8, 0, 0, 0, 0, 0, 0],
            [0, 0, 7, 1, 0, 0, 0, 0, 0],
            [0, 0, 4, 2, 0, 0, 0, 3, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0],  # sam14 zeros
            [0, 0, 0, 8, 0, 0, 0, 0, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],  # sam 16 now like sam 13
            [0, 0, 0, 4, 2, 0, 0, 0, 4],
            [0, 0, 0, 2, 4, 0, 0, 0, 1],
            [0, 0, 0, 1, 7, 0, 0, 0, 0]
        ])
        warnings.filterwarnings('ignore')
        res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names)
        envs = make_envs_dict(self.l19_data, self.l19_sample_names,
                              self.l19_taxon_names)
        self.assertEqual(res[0, 0], 0)
        self.assertEqual(res[0, 13], 0.0)
        self.assertEqual(res[12, 15], 0.0)
        self.assertEqual(res[0, 1], 1.0)
        warnings.resetwarnings()

        warnings.filterwarnings('ignore')
        unif_row = make_unifrac_row_metric(False, unifrac, True)
        for i, sam_name in enumerate(self.l19_sample_names):
            if i in [0]:
                continue
            # these have no data and are warned "meaningless".
            # I Would prefer if they matched res anyway though
            res_row = unif_row(otu_data, self.l19_taxon_names, tree,
                               self.l19_sample_names, sam_name)
            for j in range(len((self.l19_sample_names))):
                if j in [0]:
                    continue  # ok if meaningless number in zero sample
                self.assertEqual(res_row[j], res[i, j])
        warnings.resetwarnings()
Ejemplo n.º 14
0
    def getResult(self,
                  data_path,
                  taxon_names=None,
                  sample_names=None,
                  tree_path=None):
        """Returns per-sample diversity from incidence matrix and optional tree.
        
        Parameters:
        
        data_path: can be either a file path or an array,
        if array: either numpy array or list of numpy arrays where each row is a
        sample, contents are counts of each taxon, must be dense to allow
        phylogenetic calcs (where the taxon you have matters).
        must be 2d.  for one sample just do [sample_array]

        taxon_names: list of names of taxa, same order as in row (required for
        phylogenetic methods)
        
        tree: cogent.tree.PhyloNode object, or file path

        output:
        1d/2d array containing diversity of each sample, preserving order from
        input data  sample by (metric name or metric.return_name)
        1d: [(metric on sample1), (metric on sample2),...
        2d: [(return val 1 from sample1),(return val 2)...]
            [(return val 1 on sample2),...]
        """
        data = self.getData(data_path)
        if self.IsPhylogenetic:
            tree = self.getTree(tree_path)
            envs = make_envs_dict(data, sample_names, taxon_names)
            new_sample_names, result = self.Metric(tree, envs, **self.Params)
            ordered_res = numpy.zeros(len(sample_names), 'float')
            for i, sample in enumerate(sample_names):
                try:
                    # idx is sample's index in result from metric
                    idx = new_sample_names.index(sample)
                    ordered_res[i] = result[idx]
                except ValueError:
                    pass  # already is zero
            return numpy.array(ordered_res)

        else:

            def metric(row):
                return self.Metric(row, **self.Params)

            result = map(metric, data)

            return numpy.array(result)
Ejemplo n.º 15
0
 def test_make_unifrac_metric(self):
     """ exercise of the unweighted unifrac metric should not throw errors"""
     tree = parse_newick(self.l19_treestr, PhyloNode)
     unif = make_unifrac_metric(False, unifrac, True)
     res = unif(self.l19_data, self.l19_taxon_names, tree,
         self.l19_sample_names)
     envs = make_envs_dict(self.l19_data, self.l19_sample_names,
         self.l19_taxon_names)
     unifrac_mat, unifrac_names = fast_unifrac(tree, envs, 
             modes=['distance_matrix'])['distance_matrix']
     self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat,
         unifrac_names], self.l19_sample_names))
     self.assertEqual(res[0,0], 0)
     self.assertEqual(res[0,3], 0.0)
     self.assertNotEqual(res[0,1], 1.0)
Ejemplo n.º 16
0
    def getResult(self, data_path, taxon_names=None, sample_names=None, 
        tree_path=None):
        """Returns per-sample diversity from incidence matrix and optional tree.
        
        Parameters:
        
        data_path: can be either a file path or an array,
        if array: either numpy array or list of numpy arrays where each row is a
        sample, contents are counts of each taxon, must be dense to allow
        phylogenetic calcs (where the taxon you have matters).
        must be 2d.  for one sample just do [sample_array]

        taxon_names: list of names of taxa, same order as in row (required for
        phylogenetic methods)
        
        tree: cogent.tree.PhyloNode object, or file path

        output:
        1d/2d array containing diversity of each sample, preserving order from
        input data  sample by (metric name or metric.return_name)
        1d: [(metric on sample1), (metric on sample2),...
        2d: [(return val 1 from sample1),(return val 2)...]
            [(return val 1 on sample2),...]
        """
        data = self.getData(data_path)
        if self.IsPhylogenetic:
            tree = self.getTree(tree_path)
            envs = make_envs_dict(data, sample_names, taxon_names)
            new_sample_names, result = self.Metric(tree, envs, **self.Params)
            ordered_res = numpy.zeros(len(sample_names), 'float')
            for i, sample in enumerate(sample_names):
                try:
                   # idx is sample's index in result from metric
                   idx = new_sample_names.index(sample)
                   ordered_res[i] = result[idx]
                except ValueError:
                   pass # already is zero
            return numpy.array(ordered_res)
            
        else:
            def metric(row):
                return self.Metric(row, **self.Params)
            result = map(metric, data)
            
            return numpy.array(result)
Ejemplo n.º 17
0
    def result(data, taxon_names, tree, sample_names, one_sample_name,**kwargs):
        """ wraps the fast_unifrac fn to return just a matrix, in correct order

            sample_names: list of unique strings
        """
        envs = make_envs_dict(data, sample_names, taxon_names)
        try:
            unifrac_res = fast_unifrac_one_sample(one_sample_name,
                tree, envs, weighted=weighted, metric=metric,**kwargs)
        except ValueError as e:
            if 'one_sample_name not found' in str(e):
                warnings.warn('unifrac had no information on sample ' +\
                    one_sample_name +\
                     ". Distances involving that sample aren't meaningful")
                unifrac_res = (numpy.array([0.0]),[one_sample_name]) # self only
            else:
                raise e
        dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res,
            sample_names)
        return dist_mtx
Ejemplo n.º 18
0
    def test_make_unifrac_metric3(self):
        treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, '+\
            '((tax6:.09):0.43):0.5):.2,'+\
            '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed
        tree = parse_newick(treestr, PhyloNode)

        otu_data = numpy.array([
            [7,1,0,0,0,0,0,0,0], # 1 now zeros
            [4,2,0,0,0,1,0,0,0], 
            [2,4,0,0,0,1,0,0,0],
            [1,7,0,0,0,0,0,0,0], # 4 now zeros
            [0,8,0,0,0,0,0,0,0],
            [0,7,1,0,0,0,0,0,0],
            [0,4,2,0,0,0,2,0,0],
            [0,2,4,0,0,0,1,0,0],
            [0,1,7,0,0,0,0,0,0],
            [0,0,8,0,0,0,0,0,0],
            [0,0,7,1,0,0,0,0,0],
            [0,0,4,2,0,0,0,3,0],
            [0,0,2,4,0,0,0,1,0],
            [0,0,1,7,0,0,0,0,0],
            [0,0,0,8,0,0,0,0,0],
            [0,0,0,7,1,0,0,0,0],
            [0,0,0,4,2,0,0,0,4],
            [0,0,0,2,4,0,0,0,1],
            [0,0,0,1,7,0,0,0,0]
            ])
            
        unif = make_unifrac_metric(False, unifrac, True)
        warnings.filterwarnings('ignore')
        res = unif(otu_data, self.l19_taxon_names, tree,
            self.l19_sample_names)
        warnings.resetwarnings()
        envs = make_envs_dict(self.l19_data, self.l19_sample_names,
            self.l19_taxon_names)
        self.assertEqual(res[0,0], 0)
        self.assertEqual(res[0,3], 0.0)
        self.assertEqual(res[0,1], 1.0)
Ejemplo n.º 19
0
    def test_make_unifrac_metric3(self):
        treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, '+\
            '((tax6:.09):0.43):0.5):.2,'+\
            '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed
        tree = parse_newick(treestr, PhyloNode)

        otu_data = numpy.array([
            [7,1,0,0,0,0,0,0,0], # 1 now zeros
            [4,2,0,0,0,1,0,0,0], 
            [2,4,0,0,0,1,0,0,0],
            [1,7,0,0,0,0,0,0,0], # 4 now zeros
            [0,8,0,0,0,0,0,0,0],
            [0,7,1,0,0,0,0,0,0],
            [0,4,2,0,0,0,2,0,0],
            [0,2,4,0,0,0,1,0,0],
            [0,1,7,0,0,0,0,0,0],
            [0,0,8,0,0,0,0,0,0],
            [0,0,7,1,0,0,0,0,0],
            [0,0,4,2,0,0,0,3,0],
            [0,0,2,4,0,0,0,1,0],
            [0,0,1,7,0,0,0,0,0],
            [0,0,0,8,0,0,0,0,0],
            [0,0,0,7,1,0,0,0,0],
            [0,0,0,4,2,0,0,0,4],
            [0,0,0,2,4,0,0,0,1],
            [0,0,0,1,7,0,0,0,0]
            ])
            
        unif = make_unifrac_metric(False, unifrac, True)
        warnings.filterwarnings('ignore')
        res = unif(otu_data, self.l19_taxon_names, tree,
            self.l19_sample_names)
        warnings.resetwarnings()
        envs = make_envs_dict(self.l19_data, self.l19_sample_names,
            self.l19_taxon_names)
        self.assertEqual(res[0,0], 0)
        self.assertEqual(res[0,3], 0.0)
        self.assertEqual(res[0,1], 1.0)
Ejemplo n.º 20
0
 def test_make_unifrac_metric2(self):
     """ samples with no seqs, and identical samples, should behave correctly
     """
     tree = parse_newick(self.l19_treestr, PhyloNode)
     unif = make_unifrac_metric(False, unifrac, True)
     otu_data = numpy.array([
         [0,0,0,0,0,0,0,0,0],#sam1 zeros
         [4,2,0,0,0,1,0,0,0],
         [2,4,0,0,0,1,0,0,0],
         [1,7,0,0,0,0,0,0,0],
         [0,8,0,0,0,0,0,0,0],
         [0,7,1,0,0,0,0,0,0],
         [0,4,2,0,0,0,2,0,0],
         [0,2,4,0,0,0,1,0,0],
         [0,1,7,0,0,0,0,0,0],
         [0,0,8,0,0,0,0,0,0],
         [0,0,7,1,0,0,0,0,0],
         [0,0,4,2,0,0,0,3,0],
         [0,0,2,4,0,0,0,1,0],
         [0,0,0,0,0,0,0,0,0],#sam14 zeros
         [0,0,0,8,0,0,0,0,0],
         [0,0,2,4,0,0,0,1,0], #sam 16 now like sam 13
         [0,0,0,4,2,0,0,0,4],
         [0,0,0,2,4,0,0,0,1],
         [0,0,0,1,7,0,0,0,0]
         ])
     warnings.filterwarnings('ignore')
     res = unif(otu_data, self.l19_taxon_names, tree,
         self.l19_sample_names)
     envs = make_envs_dict(self.l19_data, self.l19_sample_names,
         self.l19_taxon_names)
     self.assertEqual(res[0,0], 0)
     self.assertEqual(res[0,13], 0.0)
     self.assertEqual(res[12,15], 0.0)
     self.assertEqual(res[0,1], 1.0)
     warnings.resetwarnings()
Ejemplo n.º 21
0
def unifrac_recursive_test(ref_tree, tree, sample_names,
                           taxon_names, data, permutations=1000):  # , metric=weighted):
    """Performs UniFrac recursively over a tree.

    Specifically, for each node in the tree, performs UniFrac clustering.
    Then compares the UniFrac tree to a reference tree of the same taxa using
    the tip-to-tip distances and the subset distances. Assumption is that if
    the two trees match, the node represents a group in which evolution has
    mirrored the evolution of the reference tree.

    tree: contains the tree on which UniFrac will be performed recursively.
    envs: environments for UniFrac clustering (these envs should match the
          taxon labels in the ref_tree)
    ref_tree: reference tree that the clustering is supposed to match.
    metric: metric for UniFrac clustering.

    Typically, will want to estimate significance by comparing the actual
    values from ref_tree to values obtained with one or more shuffled versions
    of ref_tree (can make these with permute_tip_labels).


    Note from Jon: 

    I've modified this code a bit to test each node against a set of label-
    permuted host trees, and return some additional information about each node.

    It doesn't appear to give sensible results, not sure why. Almost none of the
    resulting permutations yield any other than zero or the number of permuta-
    tions. In other words, every permutation yields either a better or worse 
    match than the true tree. 
    """
    UNIFRAC_CLUST_ENVS = "cluster_envs"

    lengths, dists, sets, s_nodes, h_nodes, dist_below, sets_below, h_tips, s_tips = [
    ], [], [], [], [], [], [], [], []

    # Permute host tips, store permuted trees in a list of tree strings
    # print "Permuting host tree..."

    permuted_trees = []
    host_names = ref_tree.getTipNames()
    random_names = ref_tree.getTipNames()
    # for i in range(permutations):
    #   shuffle(random_names)
    #   permute_dict = dict(zip(host_names,random_names))
    #   permuted_subtree = ref_tree.copy()
    #   permuted_subtree.reassignNames(permute_dict)
    #   permuted_trees.append(str(permuted_subtree))
    #
    # alt:
    for i in range(permutations):
        shuffle(random_names)
        permute_dict = dict(zip(host_names, random_names))
        permuted_subtree = ref_tree.copy()
        permuted_subtree.reassignNames(permute_dict)
        permuted_trees.append(permuted_subtree)

    interaction = data.clip(0, 1)
    # Parse OTU table data into Unifrac-compatible envs tuple

    envs = make_envs_dict(data.T, sample_names, taxon_names)

    # Pass host tree, new OTU tree, and envs to recursive unifrac
    # print "Performing recursive Unifrac analysis..."

    for node in tree.traverse(self_before=True, self_after=False):

        #pause = raw_input("pause!")
        # print node
        try:
            result = fast_unifrac(
                node, envs, weighted=False, modes=set([UNIFRAC_CLUST_ENVS]))
            curr_tree = result[UNIFRAC_CLUST_ENVS]
        except ValueError:
            # hit a single node?
            continue
        except AttributeError:
            # hit a zero branch length
            continue
        if curr_tree is None:
            # hit single node?
            continue
        try:
            l = len(curr_tree.tips())
            d = curr_tree.compareByTipDistances(ref_tree)
            s = curr_tree.compareBySubsets(ref_tree, True)

            d_b = 0.0
            s_b = 0.0

            # for rand_tree_string in permuted_trees:
            #   rand_tree = DndParser(rand_tree_string)
            #   if d >= curr_tree.compareByTipDistances(rand_tree):
            #       d_b += 1
            #   if s >= curr_tree.compareBySubsets(rand_tree):
            #       s_b += 1

            for rand_tree in permuted_trees:
                if d >= curr_tree.compareByTipDistances(rand_tree):
                    d_b += 1
                if s >= curr_tree.compareBySubsets(rand_tree):
                    s_b += 1

            d_b = d_b / float(len(permuted_trees))
            s_b = s_b / float(len(permuted_trees))

            # The following section generates s_tips and h_tips variables
            # get just OTUs in this node
            otu_subset = node.getTipNames()
            s_tips_tmp = 0
            h_tips_tmp = 0
            s_vec = []
            # find positional index (from OTU table) for each cOTU represented
            # in this node:
            for i in range(len(taxon_names)):
                if taxon_names[i] in otu_subset:
                    s_tips_tmp += 1
                    s_vec.append(i)

            # slice interaction matrix down to only cOTUs in this node
            i_s_slice = interaction[numpy.ix_(s_vec)]

            # find positional index (this time from OTU table size) for each sample in this node:
            # sum all values in column for each host, if greater than zero, add
            # that host position to h_vec
            for j in range(i_s_slice.shape[1]):
                if i_s_slice[:, j].sum():
                    h_tips_tmp += 1

            # want to calculate all values before appending so we can bail out
            # if any of the calculations fails: this ensures that the lists
            # remain synchronized.

            """
            print curr_tree.asciiArt()
            print ref_tree.asciiArt()
            print l
            print d
            print d_b
            print s
            print s_b
            print node
            
            pause = raw_input("pause!")
            """

            if l > 2:
                lengths.append(l)
                dists.append(d)
                sets.append(s)
                s_nodes.append(node)
                h_nodes.append(curr_tree)
                dist_below.append(d_b)
                sets_below.append(s_b)
                h_tips.append(h_tips_tmp)
                s_tips.append(s_tips_tmp)
        except ValueError:
            # no common taxa
            continue
    results_dict = {'p_vals': sets_below, 's_tips': s_tips,
                    'h_tips': h_tips, 's_nodes': s_nodes, 'h_nodes': h_nodes}

    acc_dict = {'lengths': lengths, 'dists': dists,
                'sets': sets, 'dist_below': dist_below}

    return (results_dict, acc_dict)