Example #1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.show_metrics:
        print("Known metrics are: %s\n"
              % (', '.join(list_known_metrics()),))
        exit(0)

    almost_required_options = ['input_path', 'output_dir', 'metrics']
    for option in almost_required_options:
        if getattr(opts, option) is None:
            option_parser.error('Required option --%s omitted.' % option)

    if opts.output_dir.endswith('.txt'):
        stderr.write('output must be a directory, files will be named' +
                     ' automatically.  And we refuse to make .txt directories\n')
        exit(1)

    if opts.tree_path == "None":
        opts.tree_path = None

    try:
        os.makedirs(opts.output_dir)
    except OSError:
        pass  # hopefully dir already exists

    if os.path.isdir(opts.input_path):
        multiple_file_beta(opts.input_path, opts.output_dir, opts.metrics,
                           opts.tree_path, opts.rows, full_tree=opts.full_tree)
    elif os.path.isfile(opts.input_path):
        single_file_beta(opts.input_path, opts.metrics, opts.tree_path,
                         opts.output_dir, opts.rows, full_tree=opts.full_tree)
    else:
        stderr.write("io error, input path not valid.  Does it exist?")
        exit(1)
Example #2
0
    def single_file_beta(self, otu_table_string, tree_string, missing_sams=None,
                         use_metric_list=False):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams==None:
            missing_sams = []
        # setup
        input_path = get_tmp_filename()
        in_fname = os.path.split(input_path)[1]
        f = open(input_path,'w')
        f.write(otu_table_string)
        f.close()
        tree_path = get_tmp_filename()
        f = open(tree_path,'w')
        f.write(tree_string)
        f.close()
        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())
        output_dir = get_tmp_filename(suffix = '')
        os.mkdir(output_dir)

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings('ignore','dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore','dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...')  
        warnings.filterwarnings('ignore','dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...')     
        warnings.filterwarnings('ignore','dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...')  
        warnings.filterwarnings('ignore','unifrac had no information for\
 sample M*')

        self.files_to_remove.extend([input_path,tree_path])
        self.folders_to_remove.append(output_dir)
        os.mkdir(output_dir+'/ft/')

        for metric in metrics:
            # do it
            if use_metric_list:
                single_file_beta(input_path, [metric], tree_path, output_dir,
                    rowids=None)
            else:
                single_file_beta(input_path, metric, tree_path, output_dir,
                    rowids=None)
            sams, dmtx = parse_distmat(open(output_dir + '/' +\
                metric + '_' + in_fname))

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams: continue
                rows = sams[i]
                row_outname = output_dir + '/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric], tree_path,
                        output_dir, rowids=rows)
                else:
                    single_file_beta(input_path, metric, tree_path, output_dir,
                        rowids=rows)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape, (len(rows.split(',')),
                    len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j,k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                sams.index(col_sams[k])]
                        self.assertFloatEqual(row_v1, full_v1)


            ### full tree run:
            if 'full_tree' in str(metric).lower(): continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams: continue
                rows = sams[i]
                
                row_outname = output_dir + '/ft/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric], tree_path,
                        output_dir+'/ft/', rowids=rows, full_tree=True)
                else:
                    single_file_beta(input_path, metric, tree_path,
                        output_dir+'/ft/', rowids=rows, full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape, (len(rows.split(',')),
                    len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j,k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                sams.index(col_sams[k])]
                        self.assertFloatEqual(row_v1, full_v1)

            # # do it with full tree
            if use_metric_list:
                single_file_beta(input_path, [metric], tree_path,
                    output_dir+'/ft/', rowids=None, full_tree=True)
            else:
                single_file_beta(input_path, metric, tree_path,
                    output_dir+'/ft/', rowids=None, full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(open(output_dir + '/ft/' +\
                metric + '_' + in_fname))
            self.assertEqual(sams_ft, sams)
            self.assertFloatEqual(dmtx_ft, dmtx)
Example #3
0
    def single_file_beta(self,
                         otu_table_string,
                         tree_string,
                         missing_sams=None,
                         use_metric_list=False):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams is None:
            missing_sams = []
        # setup
        fd, input_path = mkstemp(suffix='.txt')
        close(fd)
        in_fname = os.path.split(input_path)[1]
        f = open(input_path, 'w')
        f.write(otu_table_string)
        f.close()
        fd, tree_path = mkstemp(suffix='.tre')
        close(fd)
        f = open(tree_path, 'w')
        f.write(tree_string)
        f.close()
        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())
        output_dir = mkdtemp()

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings(
            'ignore', 'dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...')
        warnings.filterwarnings(
            'ignore', 'dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...')
        warnings.filterwarnings('ignore', 'unifrac had no information for\
 sample M*')

        self.files_to_remove.extend([input_path, tree_path])
        self.folders_to_remove.append(output_dir)
        os.mkdir(output_dir + '/ft/')

        for metric in metrics:
            # do it
            if use_metric_list:
                single_file_beta(input_path, [metric],
                                 tree_path,
                                 output_dir,
                                 rowids=None)
            else:
                single_file_beta(input_path,
                                 metric,
                                 tree_path,
                                 output_dir,
                                 rowids=None)
            sams, dmtx = parse_distmat(
                open(output_dir + '/' + metric + '_' + in_fname))

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]
                row_outname = output_dir + '/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric],
                                     tree_path,
                                     output_dir,
                                     rowids=rows)
                else:
                    single_file_beta(input_path,
                                     metric,
                                     tree_path,
                                     output_dir,
                                     rowids=rows)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape,
                                 (len(rows.split(',')), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        assert_almost_equal(row_v1, full_v1)

            # full tree run:
            if 'full_tree' in str(metric).lower():
                continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]

                row_outname = output_dir + '/ft/' + metric + '_' +\
                    in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric],
                                     tree_path,
                                     output_dir + '/ft/',
                                     rowids=rows,
                                     full_tree=True)
                else:
                    single_file_beta(input_path,
                                     metric,
                                     tree_path,
                                     output_dir + '/ft/',
                                     rowids=rows,
                                     full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape,
                                 (len(rows.split(',')), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(','))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 =\
                            dmtx[sams.index(row_sams[j]),
                                 sams.index(col_sams[k])]
                        assert_almost_equal(row_v1, full_v1)

            # do it with full tree
            if use_metric_list:
                single_file_beta(input_path, [metric],
                                 tree_path,
                                 output_dir + '/ft/',
                                 rowids=None,
                                 full_tree=True)
            else:
                single_file_beta(input_path,
                                 metric,
                                 tree_path,
                                 output_dir + '/ft/',
                                 rowids=None,
                                 full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(
                open(output_dir + '/ft/' + metric + '_' + in_fname))
            self.assertEqual(sams_ft, sams)
            assert_almost_equal(dmtx_ft, dmtx)
    def single_file_beta(self, otu_table_string, tree_string, missing_sams=None, use_metric_list=False):
        """ running single_file_beta should give same result using --rows"""
        if missing_sams is None:
            missing_sams = []
        # setup
        fd, input_path = mkstemp(suffix=".txt")
        os.close(fd)
        in_fname = os.path.split(input_path)[1]
        f = open(input_path, "w")
        f.write(otu_table_string)
        f.close()
        fd, tree_path = mkstemp(suffix=".tre")
        os.close(fd)
        f = open(tree_path, "w")
        f.write(tree_string)
        f.close()
        metrics = list_known_nonphylogenetic_metrics()
        metrics.extend(list_known_phylogenetic_metrics())
        output_dir = mkdtemp()

        # new metrics that don't trivially parallelize must be dealt with
        # carefully
        warnings.filterwarnings(
            "ignore",
            "dissimilarity binary_dist_chisq is\
 not parallelized, calculating the whole matrix...",
        )
        warnings.filterwarnings(
            "ignore",
            "dissimilarity dist_chisq is not\
 parallelized, calculating the whole matrix...",
        )
        warnings.filterwarnings(
            "ignore",
            "dissimilarity dist_gower is not\
 parallelized, calculating the whole matrix...",
        )
        warnings.filterwarnings(
            "ignore",
            "dissimilarity dist_hellinger is\
 not parallelized, calculating the whole matrix...",
        )
        warnings.filterwarnings(
            "ignore",
            "unifrac had no information for\
 sample M*",
        )

        self.files_to_remove.extend([input_path, tree_path])
        self.folders_to_remove.append(output_dir)
        os.mkdir(output_dir + "/ft/")

        for metric in metrics:
            # do it
            if use_metric_list:
                single_file_beta(input_path, [metric], tree_path, output_dir, rowids=None)
            else:
                single_file_beta(input_path, metric, tree_path, output_dir, rowids=None)
            sams, dmtx = parse_distmat(open(output_dir + "/" + metric + "_" + in_fname))

            # do it by rows
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]
                row_outname = output_dir + "/" + metric + "_" + in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric], tree_path, output_dir, rowids=rows)
                else:
                    single_file_beta(input_path, metric, tree_path, output_dir, rowids=rows)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape, (len(rows.split(",")), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(","))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 = dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])]
                        npt.assert_almost_equal(row_v1, full_v1)

            # full tree run:
            if "full_tree" in str(metric).lower():
                continue
            # do it by rows with full tree
            for i in range(len(sams)):
                if sams[i] in missing_sams:
                    continue
                rows = sams[i]

                row_outname = output_dir + "/ft/" + metric + "_" + in_fname
                if use_metric_list:
                    single_file_beta(input_path, [metric], tree_path, output_dir + "/ft/", rowids=rows, full_tree=True)
                else:
                    single_file_beta(input_path, metric, tree_path, output_dir + "/ft/", rowids=rows, full_tree=True)
                col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname))

                self.assertEqual(row_dmtx.shape, (len(rows.split(",")), len(sams)))

                # make sure rows same as full
                for j in range(len(rows.split(","))):
                    for k in range(len(sams)):
                        row_v1 = row_dmtx[j, k]
                        full_v1 = dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])]
                        npt.assert_almost_equal(row_v1, full_v1)

            # do it with full tree
            if use_metric_list:
                single_file_beta(input_path, [metric], tree_path, output_dir + "/ft/", rowids=None, full_tree=True)
            else:
                single_file_beta(input_path, metric, tree_path, output_dir + "/ft/", rowids=None, full_tree=True)
            sams_ft, dmtx_ft = parse_distmat(open(output_dir + "/ft/" + metric + "_" + in_fname))
            self.assertEqual(sams_ft, sams)
            npt.assert_almost_equal(dmtx_ft, dmtx)