Esempio n. 1
0
    def single_object_beta(self,
                           otu_table,
                           metric,
                           tree_string,
                           missing_sams=None):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams is None:
            missing_sams = []

        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings(
            'ignore', 'dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'unifrac had no information for\
 sample M*')

        # self.files_to_remove.extend([input_path,tree_path])
        # self.folders_to_remove.append(output_dir)
        # os.mkdir(output_dir+'/ft/')

        for metric in metrics:
            # do it
            beta_out = single_object_beta(otu_table,
                                          metric,
                                          tree_string,
                                          rowids=None,
                                          full_tree=False)

            sams, dmtx = parse_distmat(beta_out)

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]
                # row_outname = output_dir + '/' + metric + '_' +\
                # in_fname
                r_out = single_object_beta(otu_table,
                                           metric,
                                           tree_string,
                                           rowids=rows,
                                           full_tree=False)
                col_sams, row_sams, row_dmtx = parse_matrix(r_out)

                self.assertEqual(row_dmtx.shape,
                                 (len(rows.split(',')), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        npt.assert_almost_equal(row_v1, full_v1)

            # full tree run:
            if 'full_tree' in str(metric).lower():
                continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]

                #~ row_outname = output_dir + '/ft/' + metric + '_' +\
                #~ in_fname
                r_out = single_object_beta(otu_table,
                                           metric,
                                           tree_string,
                                           rowids=None,
                                           full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(r_out)

                self.assertEqual(row_dmtx.shape,
                                 (len(rows.split(',')), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        npt.assert_almost_equal(row_v1, full_v1)

            # do it with full tree
            r_out = single_object_beta(otu_table,
                                       metric,
                                       tree_string,
                                       rowids=None,
                                       full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(r_out)
            self.assertEqual(sams_ft, sams)
            npt.assert_almost_equal(dmtx_ft, dmtx)
Esempio n. 2
0
    def single_object_beta(self, otu_table, metric, tree_string,
                            missing_sams=None):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams==None:
            missing_sams = []
        # setup
        #input_path = get_tmp_filename()
        #in_fname = os.path.split(input_path)[1]
        #f = open(input_path,'w')
        #f.write(otu_table_string)
        #f.close()
        #tree_path = get_tmp_filename()
        #f = open(tree_path,'w')
        #f.write(tree_string)
        #f.close()
        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())
        #output_dir = get_tmp_filename(suffix = '')
        #os.mkdir(output_dir)

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings('ignore','dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore','dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...')  
        warnings.filterwarnings('ignore','dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...')     
        warnings.filterwarnings('ignore','dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...')  
        warnings.filterwarnings('ignore','unifrac had no information for\
 sample M*')

        #self.files_to_remove.extend([input_path,tree_path])
        #self.folders_to_remove.append(output_dir)
        #os.mkdir(output_dir+'/ft/')

        for metric in metrics:
            # do it
            beta_out = single_object_beta(otu_table, metric, 
                                          tree_string,rowids=None,
                                          full_tree=False)
                                          
            sams, dmtx = parse_distmat(beta_out)

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams: continue
                rows = sams[i]
                #row_outname = output_dir + '/' + metric + '_' +\
                    #in_fname
                r_out = single_object_beta(otu_table, metric, 
                                          tree_string,rowids=rows,
                                          full_tree=False)
                col_sams, row_sams, row_dmtx = parse_matrix(r_out)

                self.assertEqual(row_dmtx.shape, (len(rows.split(',')),
                    len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j,k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                sams.index(col_sams[k])]
                        self.assertFloatEqual(row_v1, full_v1)


            ### full tree run:
            if 'full_tree' in str(metric).lower(): continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams: continue
                rows = sams[i]
                
                #~ row_outname = output_dir + '/ft/' + metric + '_' +\
                    #~ in_fname
                r_out = single_object_beta(otu_table, metric, 
                                          tree_string,rowids=None,
                                          full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(r_out)

                self.assertEqual(row_dmtx.shape, (len(rows.split(',')),
                    len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j,k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                sams.index(col_sams[k])]
                        self.assertFloatEqual(row_v1, full_v1)

            # # do it with full tree
            r_out = single_object_beta(otu_table, metric, 
                                          tree_string,rowids=None,
                                          full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(r_out)
            self.assertEqual(sams_ft, sams)
            self.assertFloatEqual(dmtx_ft, dmtx)
Esempio n. 3
0
    def single_file_beta(self,
                         otu_table_string,
                         tree_string,
                         missing_sams=None,
                         use_metric_list=False):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams is None:
            missing_sams = []
        # setup
        fd, input_path = mkstemp(suffix='.txt')
        os.close(fd)
        in_fname = os.path.split(input_path)[1]
        f = open(input_path, 'w')
        f.write(otu_table_string)
        f.close()
        fd, tree_path = mkstemp(suffix='.tre')
        os.close(fd)
        f = open(tree_path, 'w')
        f.write(tree_string)
        f.close()
        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())
        output_dir = mkdtemp()

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings(
            'ignore', 'dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'unifrac had no information for\
 sample M*')

        self.files_to_remove.extend([input_path, tree_path])
        self.folders_to_remove.append(output_dir)
        os.mkdir(output_dir + '/ft/')

        for metric in metrics:
            # do it
            if use_metric_list:
                single_file_beta(input_path, [metric],
                                 tree_path,
                                 output_dir,
                                 rowids=None)
            else:
                single_file_beta(input_path,
                                 metric,
                                 tree_path,
                                 output_dir,
                                 rowids=None)
            sams, dmtx = parse_distmat(
                open(output_dir + '/' + metric + '_' + in_fname))

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]
                row_outname = output_dir + '/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric],
                                     tree_path,
                                     output_dir,
                                     rowids=rows)
                else:
                    single_file_beta(input_path,
                                     metric,
                                     tree_path,
                                     output_dir,
                                     rowids=rows)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape,
                                 (len(rows.split(',')), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        npt.assert_almost_equal(row_v1, full_v1)

            # full tree run:
            if 'full_tree' in str(metric).lower():
                continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]

                row_outname = output_dir + '/ft/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric],
                                     tree_path,
                                     output_dir + '/ft/',
                                     rowids=rows,
                                     full_tree=True)
                else:
                    single_file_beta(input_path,
                                     metric,
                                     tree_path,
                                     output_dir + '/ft/',
                                     rowids=rows,
                                     full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape,
                                 (len(rows.split(',')), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        npt.assert_almost_equal(row_v1, full_v1)

            # do it with full tree
            if use_metric_list:
                single_file_beta(input_path, [metric],
                                 tree_path,
                                 output_dir + '/ft/',
                                 rowids=None,
                                 full_tree=True)
            else:
                single_file_beta(input_path,
                                 metric,
                                 tree_path,
                                 output_dir + '/ft/',
                                 rowids=None,
                                 full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(
                open(output_dir + '/ft/' + metric + '_' + in_fname))
            self.assertEqual(sams_ft, sams)
            npt.assert_almost_equal(dmtx_ft, dmtx)
Esempio n. 4
0
    def single_file_beta(
            self, otu_table_string, tree_string, missing_sams=None,
            use_metric_list=False):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams is None:
            missing_sams = []
        # setup
        fd, input_path = mkstemp(suffix='.txt')
        close(fd)
        in_fname = os.path.split(input_path)[1]
        f = open(input_path, 'w')
        f.write(otu_table_string)
        f.close()
        fd, tree_path = mkstemp(suffix='.tre')
        close(fd)
        f = open(tree_path, 'w')
        f.write(tree_string)
        f.close()
        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())
        output_dir = mkdtemp()

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings('ignore', 'dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'unifrac had no information for\
 sample M*')

        self.files_to_remove.extend([input_path, tree_path])
        self.folders_to_remove.append(output_dir)
        os.mkdir(output_dir + '/ft/')

        for metric in metrics:
            # do it
            if use_metric_list:
                single_file_beta(input_path, [metric], tree_path, output_dir,
                                 rowids=None)
            else:
                single_file_beta(input_path, metric, tree_path, output_dir,
                                 rowids=None)
            sams, dmtx = parse_distmat(open(output_dir + '/' +
                                            metric + '_' + in_fname))

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]
                row_outname = output_dir + '/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric], tree_path,
                                     output_dir, rowids=rows)
                else:
                    single_file_beta(input_path, metric, tree_path, output_dir,
                                     rowids=rows)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape, (len(rows.split(',')),
                                                  len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        assert_almost_equal(row_v1, full_v1)

            # full tree run:
            if 'full_tree' in str(metric).lower():
                continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]

                row_outname = output_dir + '/ft/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric], tree_path,
                                     output_dir + '/ft/', rowids=rows, full_tree=True)
                else:
                    single_file_beta(input_path, metric, tree_path,
                                     output_dir + '/ft/', rowids=rows, full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape, (len(rows.split(',')),
                                                  len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        assert_almost_equal(row_v1, full_v1)

            # do it with full tree
            if use_metric_list:
                single_file_beta(input_path, [metric], tree_path,
                                 output_dir + '/ft/', rowids=None, full_tree=True)
            else:
                single_file_beta(input_path, metric, tree_path,
                                 output_dir + '/ft/', rowids=None, full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(open(output_dir + '/ft/' +
                                                  metric + '_' + in_fname))
            self.assertEqual(sams_ft, sams)
            assert_almost_equal(dmtx_ft, dmtx)