Example #1
0
    def test_run_core_diversity_analyses_error_on_invalid_category(self):
        """run_core_diversity_analyses: error raised on invalid categories
        """
        # too few values in 'month' category
        self.assertRaises(ValueError,
                          run_core_diversity_analyses,
                          self.test_data['biom'][0],
                          self.test_data['map'][0],
                          20,
                          output_dir=self.test_out,
                          params=parse_qiime_parameters({}),
                          qiime_config=self.qiime_config,
                          categories=['SampleType', 'month'],
                          tree_fp=self.test_data['tree'][0],
                          parallel=False,
                          status_update_callback=no_status_updates)

        # invalid category name
        self.assertRaises(ValueError,
                          run_core_diversity_analyses,
                          self.test_data['biom'][0],
                          self.test_data['map'][0],
                          20,
                          output_dir=self.test_out,
                          params=parse_qiime_parameters({}),
                          qiime_config=self.qiime_config,
                          categories=['not-a-real-category'],
                          tree_fp=self.test_data['tree'][0],
                          parallel=False,
                          status_update_callback=no_status_updates)
    def test_run_core_diversity_analyses_error_on_invalid_category(self):
        """run_core_diversity_analyses: error raised on invalid categories
        """
        # too few values in 'month' category
        self.assertRaises(ValueError,
                          run_core_diversity_analyses,
                          self.test_data['biom'][0],
                          self.test_data['map'][0],
                          20,
                          output_dir=self.test_out,
                          params=parse_qiime_parameters({}),
                          qiime_config=self.qiime_config,
                          categories=['SampleType', 'month'],
                          tree_fp=self.test_data['tree'][0],
                          parallel=False,
                          status_update_callback=no_status_updates)

        # invalid category name
        self.assertRaises(ValueError,
                          run_core_diversity_analyses,
                          self.test_data['biom'][0],
                          self.test_data['map'][0],
                          20,
                          output_dir=self.test_out,
                          params=parse_qiime_parameters({}),
                          qiime_config=self.qiime_config,
                          categories=['not-a-real-category'],
                          tree_fp=self.test_data['tree'][0],
                          parallel=False,
                          status_update_callback=no_status_updates)
Example #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose

    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    verbose = opts.verbose
    print_only = opts.print_only
    mapping_cat = opts.mapping_category
    sort = opts.sort

    if mapping_cat is not None and mapping_fp is None:
        option_parser.error("If passing -c must also pass -m.")

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError(
                "Can't open parameters file (%s). Does it exist? Do you have read access?"
                % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error(
                "Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_summarize_taxa_through_plots(
        otu_table_fp=otu_table_fp,
        mapping_fp=mapping_fp,
        output_dir=output_dir,
        mapping_cat=mapping_cat,
        sort=sort,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        status_update_callback=status_update_callback)
    def run_command(self, options, arguments):

        verbose = options["verbose"]

        input_fp = options["input_fp"]
        output_dir = options["output_dir"]
        verbose = options["verbose"]
        print_only = options["print_only"]

        parallel = options["parallel"]
        # No longer checking that jobs_to_start > 2, but
        # commenting as we may change our minds about this.
        # if parallel: raise_error_on_parallel_unavailable()

        if options["parameter_fp"]:
            try:
                parameter_f = open(options["parameter_fp"])
            except IOError:
                raise QiimeCommandError, "Can't open parameters file (%s). Does it exist? Do you have read access?" % options[
                    "parameter_fp"
                ]
            params = parse_qiime_parameters(parameter_f)
        else:
            params = parse_qiime_parameters([])
            # empty list returns empty defaultdict for now

        params["parallel"]["jobs_to_start"] = self._validate_jobs_to_start(
            options["jobs_to_start"], qiime_config["jobs_to_start"], parallel
        )

        try:
            makedirs(output_dir)
        except OSError:
            if options["force"]:
                pass
            else:
                # Since the analysis can take quite a while, I put this check
                # in to help users avoid overwriting previous output.
                print "Output directory already exists. Please choose " + "a different directory, or force overwrite with -f."
                exit(1)

        if print_only:
            command_handler = print_commands
        else:
            command_handler = call_commands_serially

        if verbose:
            status_update_callback = print_to_stdout
        else:
            status_update_callback = no_status_updates

        run_qiime_data_preparation(
            input_fp,
            output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            parallel=parallel,
            status_update_callback=status_update_callback,
        )
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fp = opts.input_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    # if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp)
        except IOError:
            raise IOError, "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config["jobs_to_start"]
    validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose " + "a different directory, or force overwrite with -f."
            exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_qiime_data_preparation(
        input_fp,
        output_dir,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        parallel=parallel,
        status_update_callback=status_update_callback,
    )
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    verbose = opts.verbose
    print_only = opts.print_only
    mapping_cat = opts.mapping_category
    sort=opts.sort
    
    if mapping_cat != None and mapping_fp == None:
        option_parser.error("If passing -c must also pass -m.")
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
                "Can't open parameters file (%s). Does it exist? Do you have read access?" \
                 % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
                                   
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
     
    run_summarize_taxa_through_plots(
     otu_table_fp=otu_table_fp,
     mapping_fp=mapping_fp,
     output_dir=output_dir,
     mapping_cat=mapping_cat,
     sort=sort,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     status_update_callback=status_update_callback)
    def test_run_core_diversity_analyses_parallel(self):
        """run_core_diversity_analyses functions with categories in parallel
        """
        run_core_diversity_analyses(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            20,
            output_dir=self.test_out,
            params=parse_qiime_parameters({}),
            arare_num_steps=3,
            qiime_config=self.qiime_config,
            categories=['SampleType', 'days_since_epoch'],
            tree_fp=self.test_data['tree'][0],
            parallel=True,
            status_update_callback=no_status_updates)

        # Basic sanity test that output directories and files exist
        fps = [
            '%s/bdiv_even20' % self.test_out,
            '%s/arare_max20' % self.test_out,
            '%s/taxa_plots' % self.test_out,
            '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out,
            '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out,
            '%s/arare_max20/compare_chao1/days_since_epoch_stats.txt' % self.test_out,
            '%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf' % self.test_out,
            '%s/index.html' % self.test_out,
            '%s/table_mc%d.biom.gz' % (self.test_out, 20)
        ]
        for fp in fps:
            self.assertTrue(exists(fp))
    def test_run_core_diversity_analyses_no_categories(self):
        """run_core_diversity_analyses functions without categories
        """
        # this takes a long time, so use a longer sigalrm
        run_core_diversity_analyses(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            20,
            output_dir=self.test_out,
            params=parse_qiime_parameters({}),
            qiime_config=self.qiime_config,
            categories=None,
            tree_fp=self.test_data['tree'][0],
            parallel=False,
            status_update_callback=no_status_updates)

        # Basic sanity test that output directories and files exist
        fps = [
            '%s/bdiv_even20' % self.test_out,
            '%s/arare_max20' % self.test_out,
            '%s/taxa_plots' % self.test_out,
            '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out,
            '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out,
            '%s/index.html' % self.test_out,
            '%s/table_mc%d.biom.gz' % (self.test_out, 20)
        ]
        for fp in fps:
            self.assertTrue(exists(fp))

        # categorical output files don't exist
        self.assertFalse(exists(
            '%s/arare_max20/compare_chao1/days_since_epoch_stats.txt' % self.test_out))
        self.assertFalse(exists(
            '%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf' % self.test_out))
    def test_run_core_diversity_analyses_no_tree(self):
        """run_core_diversity_analyses functions without tree
        """
        # this takes a long time, so use a longer sigalrm
        run_core_diversity_analyses(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            20,
            output_dir=self.test_out,
            params=parse_qiime_parameters(
                ['beta_diversity:metrics bray_curtis',
                 'alpha_diversity:metrics observed_species,chao1']),
            qiime_config=self.qiime_config,
            categories=['SampleType'],
            tree_fp=None,
            parallel=False,
            status_update_callback=no_status_updates)

        # Basic sanity test that output directories and files exist
        fps = [
            '%s/bdiv_even20' % self.test_out,
            '%s/arare_max20' % self.test_out,
            '%s/taxa_plots' % self.test_out,
            '%s/bdiv_even20/bray_curtis_dm.txt' % self.test_out,
            '%s/arare_max20/compare_observed_species/SampleType_boxplots.pdf' % self.test_out,
            '%s/index.html' % self.test_out,
            '%s/table_mc%d.biom.gz' % (self.test_out, 20)
        ]
        for fp in fps:
            self.assertTrue(exists(fp))

        # phylogenetic diversity output files do not exist
        self.assertFalse(exists(
            '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out))
Example #10
0
 def test_run_core_diversity_analyses_no_categories(self):
     """run_core_diversity_analyses functions without categories
     """
     # this takes a long time, so use a longer sigalrm
     run_core_diversity_analyses(
                     self.test_data['biom'][0],
                     self.test_data['map'][0],
                     20,
                     output_dir=self.test_out,
                     params=parse_qiime_parameters({}),
                     qiime_config=self.qiime_config,
                     categories=None,
                     tree_fp=self.test_data['tree'][0],
                     parallel=False,
                     status_update_callback=no_status_updates)
     
     # Basic sanity test that output directories and files exist
     fps = [
      '%s/bdiv_even20' % self.test_out,
      '%s/arare_max20' % self.test_out,
      '%s/taxa_plots' % self.test_out,
      '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out,
      '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out,
      '%s/index.html' % self.test_out,
      '%s/table_mc%d.biom.gz' % (self.test_out,20)
     ]
     for fp in fps:
         self.assertTrue(exists(fp))
         
     # categorical output files don't exist
     self.assertFalse(exists(
      '%s/arare_max20/days_since_epoch_chao1.txt' % self.test_out))
     self.assertFalse(exists(
      '%s/arare_max20/SampleType_PD_whole_tree.txt' % self.test_out))
    def test_run_core_diversity_analyses_no_tree(self):
        """run_core_diversity_analyses functions without tree
        """
        # this takes a long time, so use a longer sigalrm
        run_core_diversity_analyses(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            20,
            output_dir=self.test_out,
            params=parse_qiime_parameters(
                ['beta_diversity:metrics bray_curtis',
                 'alpha_diversity:metrics observed_species,chao1']),
            qiime_config=self.qiime_config,
            categories=['SampleType'],
            tree_fp=None,
            parallel=False,
            status_update_callback=no_status_updates)

        # Basic sanity test that output directories and files exist
        fps = [
            '%s/bdiv_even20' % self.test_out,
            '%s/arare_max20' % self.test_out,
            '%s/taxa_plots' % self.test_out,
            '%s/bdiv_even20/bray_curtis_dm.txt' % self.test_out,
            '%s/arare_max20/compare_observed_species/SampleType_boxplots.pdf' % self.test_out,
            '%s/index.html' % self.test_out,
            '%s/table_mc%d.biom.gz' % (self.test_out, 20)
        ]
        for fp in fps:
            self.assertTrue(exists(fp))

        # phylogenetic diversity output files do not exist
        self.assertFalse(exists(
            '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out))
Example #12
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                prefix='core_qiime_analyses_test_',
                                suffix='')
        self.dirs_to_remove.append(self.test_out)

        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters(params_f1)

        # suppress stderr during tests (one of the systems calls in the
        # workflow prints a warning, and we can't suppress that warning with
        # warnings.filterwarnings) here because it comes from within the code
        # executed through the system call. Found this trick here:
        # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python
        self.saved_stderr = sys.stderr
        sys.stderr = StringIO()

        initiate_timeout(180)
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    paired_data = opts.paired_data
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")
            
    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['extract_barcodes'])
    else:
        params_dict = {}
        params_str = ""
    
    create_dir(output_dir)
                
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]

    if paired_data:
        all_files, bc_pairs = get_pairs(all_files, read1_indicator,
                                        read2_indicator)

    commands = create_commands_eb(all_files, paired_data, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback = no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Example #14
0
    def test_run_core_diversity_analyses_parallel(self):
        """run_core_diversity_analyses functions with categories in parallel
        """
        run_core_diversity_analyses(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            20,
            output_dir=self.test_out,
            params=parse_qiime_parameters({}),
            qiime_config=self.qiime_config,
            categories=['SampleType', 'days_since_epoch'],
            tree_fp=self.test_data['tree'][0],
            parallel=True,
            status_update_callback=no_status_updates)

        # Basic sanity test that output directories and files exist
        fps = [
            '%s/bdiv_even20' % self.test_out,
            '%s/arare_max20' % self.test_out,
            '%s/taxa_plots' % self.test_out,
            '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out,
            '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out,
            '%s/arare_max20/compare_chao1/days_since_epoch_stats.txt' %
            self.test_out,
            '%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf' %
            self.test_out,
            '%s/index.html' % self.test_out,
            '%s/table_mc%d.biom.gz' % (self.test_out, 20)
        ]
        for fp in fps:
            self.assertTrue(exists(fp))
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    match_barcodes = opts.match_barcodes
    barcode_indicator = opts.barcode_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['join_paired_ends'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)
    
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]
        
    pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator,
        match_barcodes, barcode_indicator)

    commands = create_commands_jpe(pairs, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, match_barcodes, bc_pairs)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
    def test_run_core_diversity_analyses(self):
        """run_core_diversity_analyses functions with categories
        """
        run_core_diversity_analyses(
            self.test_data["biom"][0],
            self.test_data["map"][0],
            20,
            output_dir=self.test_out,
            params=parse_qiime_parameters({}),
            qiime_config=self.qiime_config,
            categories=["SampleType", "days_since_epoch"],
            tree_fp=self.test_data["tree"][0],
            parallel=False,
            status_update_callback=no_status_updates,
        )

        # Basic sanity test that output directories and files exist
        fps = [
            "%s/bdiv_even20" % self.test_out,
            "%s/arare_max20" % self.test_out,
            "%s/taxa_plots" % self.test_out,
            "%s/bdiv_even20/unweighted_unifrac_dm.txt" % self.test_out,
            "%s/bdiv_even20/weighted_unifrac_pc.txt" % self.test_out,
            "%s/arare_max20/compare_chao1/days_since_epoch_stats.txt" % self.test_out,
            "%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf" % self.test_out,
            "%s/index.html" % self.test_out,
            "%s/table_mc%d.biom.gz" % (self.test_out, 20),
        ]
        for fp in fps:
            self.assertTrue(exists(fp))
    def setUp(self):
        """setup the test values"""
        
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        #this is specific to the web-apps only
        test_dir = abspath(dirname(__file__))
        self.fna_original_fp = os.path.join(test_dir, 'support_files', \
                                        'test.fna')

        tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(tmp_dir):
            makedirs(tmp_dir)
            
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(tmp_dir)
        
        self.wf_out = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_out',suffix='',result_constructor=str)
        if not exists(self.wf_out):
            makedirs(self.wf_out)         
            self.dirs_to_remove.append(self.wf_out)
        #print self.wf_out
        working_dir = self.qiime_config['working_dir'] or './'
        jobs_dir = join(working_dir,'jobs')
        if not exists(jobs_dir):
            # only clean up the jobs dir if it doesn't already exist
            self.dirs_to_remove.append(jobs_dir)
        self.params = parse_qiime_parameters(qiime_parameters_f.split('\n'))

        signal.signal(signal.SIGALRM, timeout)
        # set the 'alarm' to go off in allowed_seconds seconds
        signal.alarm(allowed_seconds_per_test)
    def setUp(self):
        """ """
        
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        #this is specific to the web-apps only
        test_dir = abspath(dirname(__file__))
        sff_original_fp = os.path.join(test_dir, 'support_files', \
                                        'Fasting_subset.sff')
        
        self.sff_fp = os.path.join('/%s/' % environ['HOME'], 
                                   'Fasting_subset.sff')
        self.files_to_remove.append(self.sff_fp)
        copy(sff_original_fp, self.sff_fp)
        
        self.illumina_fps = [os.path.join(test_dir, 'support_files', \
                                        's_8_1_sequence_100_records.txt'),
                             os.path.join(test_dir, 'support_files', \
                                        's_8_2_sequence_100_records.txt')]
        self.illumina_map_fp = os.path.join(test_dir, 'support_files', \
                                        's8_map_incomplete.txt')
    
        self.fasta_fps=[os.path.join(test_dir,'support_files',
                                   'test_split_lib_seqs.fasta')]
        self.fasta_map_fp = os.path.join(test_dir, 'support_files', \
                                        'fasta_mapping_file.txt')

        tmp_dir = "/%s/test_wf" % environ['HOME']
        self.dirs_to_remove.append(tmp_dir)
        
        #self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(tmp_dir):
            makedirs(tmp_dir)
            # if test creates the temp dir, also remove it
            #self.dirs_to_remove.append(tmp_dir)
            
        self.wf_out="/%s/test_processed_data" % environ['HOME']
        #print self.wf_out
        self.dirs_to_remove.append(self.wf_out)
        self.gg_out=os.path.join(self.wf_out,'gg_97_otus')
        if not exists(self.gg_out):
            makedirs(self.gg_out)
            #self.dirs_to_remove.append(self.gg_out)
            
        self.fasting_mapping_fp = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_mapping',suffix='.txt')
        fasting_mapping_f = open(self.fasting_mapping_fp,'w')
        fasting_mapping_f.write(fasting_map)
        fasting_mapping_f.close()
        self.files_to_remove.append(self.fasting_mapping_fp)
        
        self.params = parse_qiime_parameters(qiime_parameters_f)

        signal.signal(signal.SIGALRM, timeout)
        # set the 'alarm' to go off in allowed_seconds seconds
        signal.alarm(allowed_seconds_per_test)
Example #19
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                prefix='core_qiime_analyses_test_',
                                suffix='')
        self.dirs_to_remove.append(self.test_out)

        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters([])
        self.params_sortmerna = parse_qiime_parameters(
            ['pick_otus:otu_picking_method\tsortmerna'])
        self.params_sumaclust = parse_qiime_parameters(
            ['pick_otus:otu_picking_method\tsumaclust'])

        initiate_timeout(60)
    def setUp(self):
        """ """
        
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        #this is specific to the web-apps only
        test_dir = abspath(dirname(__file__))
        sff_original_fp = os.path.join(test_dir, 'support_files', \
                                        'Fasting_subset.sff')

        # copy sff file to working directory
        self.sff_dir = tempfile.mkdtemp()
        self.dirs_to_remove.append(self.sff_dir)
        
        self.sff_fp = os.path.join(self.sff_dir, 'Fasting_subset.sff')
        copy(sff_original_fp, self.sff_fp)
        self.files_to_remove.append(self.sff_fp)
        
        tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(tmp_dir):
            makedirs(tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(tmp_dir)
        
        self.wf_out = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_out',suffix='',result_constructor=str)
        self.dirs_to_remove.append(self.wf_out)
        
        self.fasting_mapping_fp = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_mapping',suffix='.txt')
        fasting_mapping_f = open(self.fasting_mapping_fp,'w')
        fasting_mapping_f.write(fasting_map)
        
        fasting_mapping_f.close()
        self.files_to_remove.append(self.fasting_mapping_fp)
        
        working_dir = self.qiime_config['working_dir'] or './'
        jobs_dir = join(working_dir,'jobs')
        if not exists(jobs_dir):
            # only clean up the jobs dir if it doesn't already exist
            self.dirs_to_remove.append(jobs_dir)
        self.params = parse_qiime_parameters(qiime_parameters_f.split('\n'))

        signal.signal(signal.SIGALRM, timeout)
        # set the 'alarm' to go off in allowed_seconds seconds
        signal.alarm(allowed_seconds_per_test)
Example #21
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                prefix='core_qiime_analyses_test_',
                                suffix='')
        self.dirs_to_remove.append(self.test_out)

        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters([])

        initiate_timeout(60)
Example #22
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                prefix='core_qiime_analyses_test_',
                                suffix='')
        self.dirs_to_remove.append(self.test_out)

        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters({})

        initiate_timeout(60)
Example #23
0
    def test_parse_qiime_parameters(self):
        """parse_qiime_parameters: functions with valid input """
        lines = ["#Don't edit this file!",\
                 "pick_otus:similarity 0.94",\
                 "pick_otus:otu_picking_method\tcdhit",\
                 "align_seqs:verbose",\
                 "assign_taxonomy:use_rdp\ttRuE",\
                 "assign_taxonomy:something\tNone",\
                 "",\
                 "#some_script:fake_parameter\t99.0"]
        actual = parse_qiime_parameters(lines)
        expected = {'pick_otus':\
                     {'similarity':'0.94', 'otu_picking_method':'cdhit'},\
                    'assign_taxonomy':\
                     {'use_rdp':None}}
        self.assertEqual(actual, expected)

        # default dict functions as expected -- looking up non-existant key
        # returns empty dict
        self.assertEqual(actual['some_other_script'], {})
Example #24
0
 def test_parse_qiime_parameters(self):
     """parse_qiime_parameters: functions with valid input """
     lines = ["#Don't edit this file!",\
              "pick_otus:similarity 0.94",\
              "pick_otus:otu_picking_method\tcdhit",\
              "align_seqs:verbose",\
              "assign_taxonomy:use_rdp\ttRuE",\
              "assign_taxonomy:something\tNone",\
              "",\
              "#some_script:fake_parameter\t99.0"]
     actual = parse_qiime_parameters(lines)
     expected = {'pick_otus':\
                  {'similarity':'0.94', 'otu_picking_method':'cdhit'},\
                 'assign_taxonomy':\
                  {'use_rdp':None}}
     self.assertEqual(actual,expected)
     
     # default dict functions as expected -- looking up non-existant key
     # returns empty dict
     self.assertEqual(actual['some_other_script'],{})
Example #25
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    verbose = opts.verbose
    print_only = opts.print_only
    seqs_per_sample = opts.seqs_per_sample

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError(
                "Can't open parameters file (%s). Does it exist? Do you have read access?"
                % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    create_dir(output_dir, fail_on_exist=not opts.force)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_beta_diversity_through_plots(
        otu_table_fp=otu_table_fp,
        mapping_fp=mapping_fp,
        output_dir=output_dir,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        color_by_interesting_fields_only=not opts.color_by_all_fields,
        sampling_depth=seqs_per_sample,
        tree_fp=tree_fp,
        parallel=parallel,
        suppress_emperor_plots=opts.suppress_emperor_plots,
        status_update_callback=status_update_callback)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    old_log_fps = glob(join(output_dir,'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
         "biom summarize-table -i %s -o %s --suppress-md5 %s" % \
         (biom_fp, biom_table_stats_output_fp,params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
                     % biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
         (biom_fp,filtered_biom_fp,sampling_depth)
        commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                          filter_samples_cmd)])
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
                     % filtered_biom_fp)
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, 
                        status_update_callback, 
                        logger,
                        close_logger_on_success=False)
        commands = []
    
    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
        # Need to check for the existence of any distance matrices, since the user 
        # can select which will be generated.
        existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
             otu_table_fp=biom_fp, 
             mapping_fp=mapping_fp,
             output_dir=bdiv_even_output_dir,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             sampling_depth=sampling_depth,
             tree_fp=tree_fp,
             parallel=parallel,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" \
                         % ', '.join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip('_dm.txt'),fp) for fp in existing_dm_fps]
        
        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params['make_distance_boxplots'])
        except KeyError:
            params_str = ''
        
        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
                plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,category)
                stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,category)
                if not exists(plot_output_fp):
                    boxplots_cmd = \
                     'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                     (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
                    commands.append([('Boxplots (%s)' % category,
                                      boxplots_cmd)])
                else:
                    logger.write("Skipping make_distance_boxplots.py for %s as %s exists.\n\n" \
                                 % (category, plot_output_fp))
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                    plot_output_fp,
                                    _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                    stats_output_fp,
                                    _index_headers['beta_diversity_even'] % sampling_depth))
            
            index_links.append(('PCoA plot (%s)' % bdiv_metric,
                                '%s/%s_emperor_pcoa_plot/index.html' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
                                '%s/%s_dm.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                                '%s/%s_pc.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
    
    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
        rarefaction_plots_output_fp = \
         '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=arare_full_output_dir,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             tree_fp=tree_fp,
             num_steps=arare_num_steps,
             parallel=parallel,
             logger=logger,
             min_rare_depth=arare_min_rare_depth,
             max_rare_depth=sampling_depth,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" \
                         % rarefaction_plots_output_fp)
    
        index_links.append(('Alpha rarefaction plots',
                            rarefaction_plots_output_fp,
                            _index_headers['alpha_diversity']))
                        
        collated_alpha_diversity_fps = \
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''
            
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
                 (arare_full_output_dir,category,alpha_metric)
                if not exists(alpha_comparison_output_fp):
                    compare_alpha_cmd = \
                     'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                     (collated_alpha_diversity_fp, mapping_fp, category, 
                      alpha_comparison_output_fp, params_str)
                    commands.append([('Compare alpha diversity (%s, %s)' %\
                                       (category,alpha_metric),
                                      compare_alpha_cmd)])
                else:
                    logger.write("Skipping compare_alpha_diversity.py for %s as %s exists.\n\n" \
                                 % (category, alpha_comparison_output_fp))
                index_links.append(
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
                  alpha_comparison_output_fp,
                  _index_headers['alpha_diversity']))
    
    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        # need to check for existence of any html files, since the user can 
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(join(output_dir,'taxa_summary_plots','*.html'))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=taxa_plots_output_dir,
             mapping_cat=None, 
             sort=True,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n" \
                         % ', '.join(existing_taxa_plot_html_fps))

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
            # need to check for existence of any html files, since the user can 
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' % taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                 otu_table_fp=biom_fp,
                 mapping_fp=mapping_fp,
                 output_dir=taxa_plots_output_dir,
                 mapping_cat=category, 
                 sort=True,
                 command_handler=command_handler,
                 params=params,
                 qiime_config=qiime_config,
                 logger=logger,
                 suppress_md5=True,
                 status_update_callback=status_update_callback)
            else:
                logger.write("Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n" \
                             % (category, ', '.join(existing_taxa_plot_html_fps)))

            index_links.append(('Taxa summary bar plots',
                                '%s/taxa_summary_plots/bar_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
            index_links.append(('Taxa summary area plots',
                                '%s/taxa_summary_plots/area_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
    
    if not suppress_otu_category_significance:
        try:
            params_str = get_params_str(params['otu_category_significance'])
        except KeyError:
            params_str = ''
        # OTU category significance
        for category in categories:
            category_signifance_fp = \
             '%s/category_significance_%s.txt' % (output_dir, category)
            if not exists(category_signifance_fp):
                # Build the OTU cateogry significance command
                category_significance_cmd = \
                 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
                 (biom_fp, mapping_fp, category, 
                  category_signifance_fp, params_str)
                commands.append([('OTU category significance (%s)' % category, 
                                  category_significance_cmd)])
            else:
                logger.write("Skipping otu_category_significance.py for %s as %s exists.\n\n" \
                             % (category, category_signifance_fp))
            
            index_links.append(('Category significance (%s)' % category,
                        category_signifance_fp,
                        _index_headers['otu_category_sig']))
    filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
        index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
                            filtered_biom_gzip_fp,
                            _index_headers['run_summary']))
    else:
        logger.write("Skipping compressing of filtered BIOM table as %s exists.\n\n" \
                     % filtered_biom_gzip_fp)
    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()
    
    generate_index_page(index_links,index_fp)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params['print_biom_table_summary'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    print_biom_table_summary_cmd = \
     "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % \
     (biom_fp, biom_table_stats_output_fp,params_str)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    commands.append([('Generate BIOM table summary',
                      print_biom_table_summary_cmd)])
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
     (biom_fp,filtered_biom_fp,sampling_depth)
    commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                      filter_samples_cmd)])
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    command_handler(commands, 
                    status_update_callback, 
                    logger,
                    close_logger_on_success=False)
    commands = []
    
    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
        even_dm_fps = run_beta_diversity_through_plots(
         otu_table_fp=biom_fp, 
         mapping_fp=mapping_fp,
         output_dir=bdiv_even_output_dir,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         sampling_depth=sampling_depth,
         # force suppression of distance histograms - boxplots work better
         # in this context, and are created below.
         histogram_categories=[],
         tree_fp=tree_fp,
         parallel=parallel,
         logger=logger,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    
        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
                try:
                    params_str = get_params_str(params['make_distance_boxplots'])
                except KeyError:
                    params_str = ''
                boxplots_cmd = \
                 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                 (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
                commands.append([('Boxplots (%s)' % category,
                                  boxplots_cmd)])
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                    '%s/%s_Distances.pdf' % \
                                     (boxplots_output_dir,category),
                                    _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                    '%s/%s_Stats.txt' % \
                                     (boxplots_output_dir,category),
                                    _index_headers['beta_diversity_even'] % sampling_depth))
            
            index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
                                '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric,
                                '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric,
                                '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric,
                                '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
                                '%s/%s_dm.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                                '%s/%s_pc.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
    
    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
        run_alpha_rarefaction(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=arare_full_output_dir,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         tree_fp=tree_fp,
         num_steps=arare_num_steps,
         parallel=parallel,
         logger=logger,
         min_rare_depth=arare_min_rare_depth,
         max_rare_depth=sampling_depth,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    
        index_links.append(('Alpha rarefaction plots',
                            '%s/alpha_rarefaction_plots/rarefaction_plots.html'\
                              % arare_full_output_dir,
                            _index_headers['alpha_diversity']))
                        
        collated_alpha_diversity_fps = \
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
                 (arare_full_output_dir,category,alpha_metric)
                compare_alpha_cmd = \
                 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                 (collated_alpha_diversity_fp, mapping_fp, category, 
                  alpha_comparison_output_fp, params_str)
                commands.append([('Compare alpha diversity (%s, %s)' %\
                                   (category,alpha_metric),
                                  compare_alpha_cmd)])
                index_links.append(
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
                  alpha_comparison_output_fp,
                  _index_headers['alpha_diversity']))
    
    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        run_summarize_taxa_through_plots(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=taxa_plots_output_dir,
         mapping_cat=None, 
         sort=True,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         logger=logger,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
            run_summarize_taxa_through_plots(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=taxa_plots_output_dir,
             mapping_cat=category, 
             sort=True,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)

            index_links.append(('Taxa summary bar plots',
                                '%s/taxa_summary_plots/bar_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
            index_links.append(('Taxa summary area plots',
                                '%s/taxa_summary_plots/area_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
    
    if not suppress_otu_category_significance:
        # OTU category significance
        for category in categories:
            category_signifance_fp = \
             '%s/category_significance_%s.txt' % (output_dir, category)
            try:
                params_str = get_params_str(params['otu_category_significance'])
            except KeyError:
                params_str = ''
            # Build the OTU cateogry significance command
            category_significance_cmd = \
             'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
             (biom_fp, mapping_fp, category, 
              category_signifance_fp, params_str)
            commands.append([('OTU category significance (%s)' % category, 
                              category_significance_cmd)])
                          
            index_links.append(('Category significance (%s)' % category,
                        category_signifance_fp,
                        _index_headers['otu_category_sig']))
    
    commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
    index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
                        '%s.gz' % filtered_biom_fp,
                        _index_headers['run_summary']))
    
    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links,index_fp)
Example #28
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    parameters = {}

    # get the tree insertion method to use
    module = opts.insertion_method

    # create output directory
    output_dir = opts.output_dir
    create_dir(output_dir)

    # list of tree insertion methods
    tree_insertion_module_names = \
                {'raxml_v730':cogent.app.raxml_v730,
                 'parsinsert':cogent.app.parsinsert,
                 'pplacer':cogent.app.pplacer}

    # load input sequences and convert to phylip since the tools require
    # the query sequences to phylip-compliant names
    load_aln = MinimalFastaParser(open(opts.input_fasta_fp, 'U'))
    aln = DenseAlignment(load_aln)
    seqs, align_map = aln.toPhylip()

    if opts.method_params_fp:
        param_dict = parse_qiime_parameters(open(opts.method_params_fp, 'U'))

    if module == 'raxml_v730':
        # load the reference sequences
        load_ref_aln = \
            DenseAlignment(MinimalFastaParser(open(opts.refseq_fp,'U')))

        # combine and load the reference plus query
        combined_aln = MinimalFastaParser(StringIO(load_ref_aln.toFasta() + \
                                                   '\n' + aln.toFasta()))
        # overwrite the alignment map
        aln = DenseAlignment(combined_aln)
        seqs, align_map = aln.toPhylip()

        try:
            parameters = param_dict['raxml']
        except:
            parameters = {}

        tree = convert_tree_tips(align_map, opts.starting_tree_fp)

        # write out the tree with phylip labels
        updated_tree_fp = join(output_dir, \
                                '%s_phylip_named_tree.tre' % (module))
        write_updated_tree_file(updated_tree_fp, tree)

        # set the primary parameters for raxml
        parameters['-w'] = abspath(output_dir) + '/'
        parameters["-n"] = split(splitext(get_tmp_filename())[0])[-1]
        parameters["-t"] = updated_tree_fp

        if "-f" not in parameters:
            parameters["-f"] = 'v'
        if "-m" not in parameters:
            parameters["-m"] = 'GTRGAMMA'

    elif module == 'pplacer':
        try:
            parameters = param_dict['pplacer']
        except:
            parameters = {}

        # make sure stats file is passed
        if not opts.stats_fp:
            raise IOError, \
                'When using pplacer, the RAxML produced info file is required.'

        # set the primary parameters for pplacer - allow for user-defined
        parameters['--out-dir'] = abspath(output_dir) + '/'
        parameters["-t"] = opts.starting_tree_fp
        parameters['-r'] = opts.refseq_fp
        parameters['-s'] = opts.stats_fp

    elif module == 'parsinsert':
        try:
            parameters = param_dict['parsinsert']
        except:
            parameters = {}

        # define log fp
        log_fp = join(output_dir, 'parsinsert.log')

        # define tax assignment values fp
        tax_assign_fp = join(output_dir, 'parsinsert_assignments.log')
        parameters["-l"] = log_fp
        parameters["-o"] = tax_assign_fp
        parameters["-s"] = opts.refseq_fp
        parameters["-t"] = opts.starting_tree_fp

    # call the module and return a tree object
    result = \
        tree_insertion_module_names[module].insert_sequences_into_tree(seqs,
                                                moltype=DNA, params=parameters)

    result_tree = strip_and_rename_unwanted_labels_from_tree(align_map, result)

    # write out the resulting tree
    final_tree = join(output_dir, '%s_final_placement.tre' % (module))
    write_updated_tree_file(final_tree, result)
Example #29
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_biom_fp = opts.input_biom_fp
    output_dir = opts.output_dir
    categories = opts.categories
    if categories is not None:
        categories = categories.split(',')
    tree_fp = opts.tree_fp
    mapping_fp = opts.mapping_fp
    verbose = opts.verbose
    parallel = opts.parallel
    sampling_depth = opts.sampling_depth
    nonphylogenetic_diversity = opts.nonphylogenetic_diversity
    print_only = opts.print_only
    suppress_taxa_summary = opts.suppress_taxa_summary
    suppress_beta_diversity = opts.suppress_beta_diversity
    suppress_alpha_diversity = opts.suppress_alpha_diversity
    suppress_group_significance = opts.suppress_group_significance

    if opts.parameter_fp is not None:
        params = parse_qiime_parameters(open(opts.parameter_fp, 'U'))
    else:
        params = parse_qiime_parameters([])

    if nonphylogenetic_diversity:
        # if the user specified --nonphylogenetic_diversity and they
        # didn't define metrics in a parameters file, define them here
        if 'metrics' not in params['beta_diversity']:
            params['beta_diversity']['metrics'] = 'bray_curtis'
        if 'metrics' not in params['alpha_diversity']:
            params['alpha_diversity']['metrics'] = 'observed_otus,chao1'

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    # Create the output directory. If it already exists and the user
    # isn't trying to recover from a failed run, raise an error.
    create_dir(output_dir, fail_on_exist=not opts.recover_from_failure)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_core_diversity_analyses(
        biom_fp=input_biom_fp,
        mapping_fp=mapping_fp,
        sampling_depth=sampling_depth,
        output_dir=output_dir,
        qiime_config=load_qiime_config(),
        command_handler=command_handler,
        tree_fp=tree_fp,
        params=params,
        categories=categories,
        arare_min_rare_depth=10,
        arare_num_steps=10,
        parallel=parallel,
        suppress_taxa_summary=suppress_taxa_summary,
        suppress_beta_diversity=suppress_beta_diversity,
        suppress_alpha_diversity=suppress_alpha_diversity,
        suppress_group_significance=suppress_group_significance,
        status_update_callback=status_update_callback)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
                            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping, read_indicator,
                                       barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method,
                                   output_dir, params_str, leading_text,
                                   trailing_text, include_input_dir_path,
                                   remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
def write_mapping_and_otu_table(data_access, table_col_value, fs_fp, web_fp, 
                                file_name_prefix,user_id,meta_id,params_path,
                                rarefied_at,otutable_rarefied_at,
                                jobs_to_start,tax_name,tree_fp):
    """ Write out the mapping file and OTU-table """
    
    tmp_prefix=get_tmp_filename('',suffix='').strip()

    total1 = time()
    unique_cols=[]
    
    # get the directory location for the files to write
    otu_table_file_dir=path.join(fs_fp,'otu_table_files')
    mapping_file_dir=path.join(fs_fp,'mapping_files')
    zip_file_dir=path.join(fs_fp,'zip_files')
    #pcoa_file_dir_loc=path.join(fs_fp,'pcoa_files')
    
    # get the url location for the files to write
    otu_table_file_dir_db=path.join(web_fp,'otu_table_files')
    mapping_file_dir_db=path.join(web_fp,'mapping_files')
    zip_file_dir_db=path.join(web_fp,'zip_files')
    pcoa_file_dir_loc_db=path.join(web_fp,'pcoa_files')
    
    # generate random directory name
    alphabet = "ABCDEFGHIJKLMNOPQRSTUZWXYZ"
    alphabet += alphabet.lower()
    alphabet += "01234567890"
    random_dir_name=''.join([choice(alphabet) for i in range(10)])
    unique_name=strftime("%Y_%m_%d_%H_%M_%S")+random_dir_name
    #plot_unique_name=beta_metric+'_plots_'+unique_name
    #pcoa_file_dir=os.path.join(pcoa_file_dir_loc,plot_unique_name)
    #pcoa_file_dir_db=os.path.join(pcoa_file_dir_loc_db,plot_unique_name)
    #create_dir(pcoa_file_dir)
    map_files=[]
    
    t1 = time()
    
    # Get the user details
    user_details = data_access.getUserDetails(user_id)
    if not user_details:
        raise ValueError('No details found for this user')
    is_admin = user_details['is_admin']

    # get mapping results
    results,cur_description=get_mapping_data(data_access, is_admin, 
                                             table_col_value, user_id)

    # need to reconnect to data_access, since it gets closed due to con.close()
    try:
        from data_access_connections import data_access_factory
        from enums import ServerConfig
        import cx_Oracle
        data_access = data_access_factory(ServerConfig.data_access_type)
    except ImportError:
        print "NOT IMPORTING QIIMEDATAACCESS"
        pass
        
    # get filepaths for mapping files using the run-prefixes
    tmp_mapping_file = file(os.path.join(mapping_file_dir, file_name_prefix + \
                                                        '_map_tmp.txt'), 'w')
    map_filepath=os.path.join(mapping_file_dir, file_name_prefix + '_' + \
                                                        tmp_prefix + '_map.txt')
    map_filepath_db=os.path.join(mapping_file_dir_db, file_name_prefix + '_' + \
                                                        tmp_prefix + '_map.txt')

    # All mapping files start with an opening hash
    tmp_mapping_file.write('#')

    # determine if a column is a controlled vaocabulary columnn
    controlled_vocab_columns={}
    for i,column in enumerate(cur_description):
        if column in ['SAMPLE_NAME', 'BARCODE', 'LINKER', 'PRIMER', \
                      'EXPERIMENT_TITLE']:
            pass
        else:
            valid_controlled_vocab=\
                        data_access.checkIfColumnControlledVocab(column[0])
            if valid_controlled_vocab:
                controlled_vocab_columns[str(column[0])]=i

    # create a dictionary storing the controlled columns and their values
    controlled_vocab_lookup={}
    for column in controlled_vocab_columns:
        vocab_id_to_valid_term=data_access.getValidControlledVocabTerms(column)
        controlled_vocab_lookup[controlled_vocab_columns[column]] = \
                                                    dict(vocab_id_to_valid_term)
    
    # Write out the key field headers
    to_write = ''
    for column in cur_description:
        if column[0]=='SAMPLEID':
            to_write+='SampleID\t'
        elif column[0]=='BARCODE':
            to_write+='BarcodeSequence\t'
        elif column[0]=='DESCRIPTION':
            to_write+='Description\t'
        elif column[0]=='LINKERPRIMERSEQUENCE':
            to_write+='LinkerPrimerSequence\t'
        else:
            to_write += column[0] + '\t'

    tmp_mapping_file.write(to_write[0:len(to_write)-1] + '\n')

    sample_to_run_prefix=[]
    study_id_and_run_prefix=[]
    samples_list=[]
    map_file_write=[]
    duplicate_samples=[]
    samples_list=[]

    for row in results:
        # Can't use something like '\t'.join(row) because not all items in list
        # are string values, hence the explicit loop structure here.
        to_write = ''
        sample_to_run_prefix.append(list((str(row[0]),str(row[4]),str(row[3]))))
        
        if list((str(row[3]),str(row[4]))) not in study_id_and_run_prefix:
            study_id_and_run_prefix.append(list((str(row[3]),str(row[4]))))
        
        if str(row[0]) in samples_list:
            # Order of row goes as follows: SampleID, BarcodeSequence,
            # LinkerPrimerSequence,Run_Prefix, then Description is at the end
            row=list(row)
            row[0]=row[0]+'_'+str(row[4])
            row=tuple(row)
            duplicate_samples.append(str(row[0]))
        else:    
            samples_list.append(str(row[0]))
        
        # write out controlled vocabulary values
        for i,column in enumerate(row):
            if controlled_vocab_lookup.has_key(i):
                val = str(column)
                if val == 'None':
                    new_val = ''
                else:
                    new_val=controlled_vocab_lookup[i][int(val)]
                to_write += new_val + '\t'
            else:
                val = str(column)
                if val == 'None':
                    val = ''
                to_write += val + '\t'
                
        # Write the row minus the last tab
        tmp_mapping_file.write(to_write[0:len(to_write)] + '\n')

    tmp_mapping_file.close()
    open_tmp_mapping_file=open(os.path.join(mapping_file_dir, 
                                file_name_prefix + '_map_tmp.txt')).readlines()
    mapping_file = file(os.path.join(mapping_file_dir, file_name_prefix + \
                                        '_' + tmp_prefix + '_map.txt'), 'w')
    mapping_lines = []
    all_headers = {}
    result = []
    
    # iterate over mapping files, parsing each
    data, current_headers, current_comments = \
       parse_mapping_file(open_tmp_mapping_file,strip_quotes=False)
    all_headers.update(dict.fromkeys(current_headers))
    for d in data:
        current_values = {}
        for i,v in enumerate(d):
            if v !='':
                current_values[current_headers[i]] = v
        mapping_lines.append(current_values)
    
    # remove and place the fields whose order is important
    del all_headers['SampleID']
    del all_headers['BarcodeSequence']
    del all_headers['LinkerPrimerSequence']
    del all_headers['Description']
    all_headers = ['SampleID','BarcodeSequence','LinkerPrimerSequence'] \
     + list(all_headers) + ['Description']
    
    
    # generate the mapping file lines containing all fields
    result.append(all_headers)
    for mapping_line in mapping_lines:
        result.append([mapping_line.get(h,'NA') for h in all_headers if h!=''])

    #Create an array using multiple columns from mapping file
    try:
        parameter_f = open(params_path)
    except IOError:
        raise IOError,\
         "Can't open parameters file (%s). Does it exist? Do you have read access?"\
         % params_path
    
    # determine if columns should be combined
    qiime_params=parse_qiime_parameters(parameter_f)
    try:
        combined_mapping_categories = \
                        qiime_params['combine_metadata']['columns'].split(',')
    except:
        combined_mapping_categories=''
    
    # combine metadata columns
    if combined_mapping_categories:
        for mapping_category in combined_mapping_categories:
            combinecolorby=mapping_category.strip('\'').strip('"').split('_and_')
            result=combine_map_header_cols(combinecolorby,result)
        
    # write final mapping file
    final_mapping=[]
    for i,mdata in enumerate(result):
        if i==0:
            final_mapping.append('#'+'\t'.join(mdata))
        else:
            final_mapping.append('\t'.join(mdata))
        
    #test=merge_mapping_files([merged_file])
    mapping_file.write('\n'.join(final_mapping))
    mapping_file.close()
    
    #flush result
    final_mapping=[]
    result=[]
    
    t2 = time()
    print 'Making map file: %s' % (t2 - t1)
    
    t1 = time()
    
    # write the OTU-table
    otu_table_filepath, otu_table_filepath_db, otu_table_fname = \
                    write_otu_table(data_access, 
                                    samples_list, tax_name, file_name_prefix,
                                    tmp_prefix, otu_table_file_dir,
                                    otu_table_file_dir_db)
    
    # zip up the files and add the paths to DB
    zip_fpath, zip_fpath_db = zip_and_add_filepaths(data_access, 
                          file_name_prefix, 
                          tmp_prefix, meta_id, otu_table_file_dir, 
                          mapping_file_dir, zip_file_dir, zip_file_dir_db, 
                          map_filepath, map_filepath_db, otu_table_filepath, 
                          otu_table_filepath_db, params_path)
    
    # rarefy OTU table if necessary
    if otutable_rarefied_at:
        rarefy_otu_table(data_access, otu_table_fname, otu_table_file_dir, 
                         otu_table_file_dir_db, otutable_rarefied_at, meta_id, 
                         otu_table_filepath, otu_table_filepath_db, zip_fpath)
                         
    # run any additional analysis, such as heatmap, bdiv, alpha-rare
    run_other_qiime_analysis(data_access, fs_fp, web_fp, otu_table_filepath, 
                              map_filepath, file_name_prefix, user_id, meta_id, 
                              params_path, rarefied_at, jobs_to_start, tree_fp, 
                              zip_fpath, zip_fpath_db)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, (
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, (
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []

    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params["print_biom_table_summary"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % (
        biom_fp,
        biom_table_stats_output_fp,
        params_str,
    )
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
    commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)])

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
        biom_fp,
        filtered_biom_fp,
        sampling_depth,
    )
    commands.append(
        [
            (
                "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                filter_samples_cmd,
            )
        ]
    )
    biom_fp = filtered_biom_fp

    # run initial commands and reset the command list
    command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
    commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
        even_dm_fps = run_beta_diversity_through_plots(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=bdiv_even_output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            sampling_depth=sampling_depth,
            # force suppression of distance histograms - boxplots work better
            # in this context, and are created below.
            histogram_categories=[],
            tree_fp=tree_fp,
            parallel=parallel,
            logger=logger,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = "%s/%s_boxplots/" % (bdiv_even_output_dir, bdiv_metric)
                try:
                    params_str = get_params_str(params["make_distance_boxplots"])
                except KeyError:
                    params_str = ""
                boxplots_cmd = "make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s" % (
                    dm_fp,
                    category,
                    boxplots_output_dir,
                    mapping_fp,
                    params_str,
                )
                commands.append([("Boxplots (%s)" % category, boxplots_cmd)])
                index_links.append(
                    (
                        "Distance boxplots (%s)" % bdiv_metric,
                        "%s/%s_Distances.pdf" % (boxplots_output_dir, category),
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )
                index_links.append(
                    (
                        "Distance boxplots statistics (%s)" % bdiv_metric,
                        "%s/%s_Stats.txt" % (boxplots_output_dir, category),
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )

            index_links.append(
                (
                    "3D plot (%s, continuous coloring)" % bdiv_metric,
                    "%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "3D plot (%s, discrete coloring)" % bdiv_metric,
                    "%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "2D plot (%s, continuous coloring)" % bdiv_metric,
                    "%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "2D plot (%s, discrete coloring)" % bdiv_metric,
                    "%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Distance matrix (%s)" % bdiv_metric,
                    "%s/%s_dm.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Principal coordinate matrix (%s)" % bdiv_metric,
                    "%s/%s_pc.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )

    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = "%s/arare_max%d/" % (output_dir, sampling_depth)
        run_alpha_rarefaction(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=arare_full_output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            tree_fp=tree_fp,
            num_steps=arare_num_steps,
            parallel=parallel,
            logger=logger,
            min_rare_depth=arare_min_rare_depth,
            max_rare_depth=sampling_depth,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        index_links.append(
            (
                "Alpha rarefaction plots",
                "%s/alpha_rarefaction_plots/rarefaction_plots.html" % arare_full_output_dir,
                _index_headers["alpha_diversity"],
            )
        )

        collated_alpha_diversity_fps = glob("%s/alpha_div_collated/*txt" % arare_full_output_dir)
        try:
            params_str = get_params_str(params["compare_alpha_diversity"])
        except KeyError:
            params_str = ""
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = "%s/%s_%s.txt" % (arare_full_output_dir, category, alpha_metric)
                compare_alpha_cmd = "compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s" % (
                    collated_alpha_diversity_fp,
                    mapping_fp,
                    category,
                    alpha_comparison_output_fp,
                    params_str,
                )
                commands.append([("Compare alpha diversity (%s, %s)" % (category, alpha_metric), compare_alpha_cmd)])
                index_links.append(
                    (
                        "Alpha diversity statistics (%s, %s)" % (category, alpha_metric),
                        alpha_comparison_output_fp,
                        _index_headers["alpha_diversity"],
                    )
                )

    if not suppress_taxa_summary:
        taxa_plots_output_dir = "%s/taxa_plots/" % output_dir
        run_summarize_taxa_through_plots(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=taxa_plots_output_dir,
            mapping_cat=None,
            sort=True,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            logger=logger,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        index_links.append(
            (
                "Taxa summary bar plots",
                "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        index_links.append(
            (
                "Taxa summary area plots",
                "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        for category in categories:
            taxa_plots_output_dir = "%s/taxa_plots_%s/" % (output_dir, category)
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=category,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )

            index_links.append(
                (
                    "Taxa summary bar plots",
                    "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )
            index_links.append(
                (
                    "Taxa summary area plots",
                    "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )

    if not suppress_otu_category_significance:
        # OTU category significance
        for category in categories:
            category_signifance_fp = "%s/category_significance_%s.txt" % (output_dir, category)
            try:
                params_str = get_params_str(params["otu_category_significance"])
            except KeyError:
                params_str = ""
            # Build the OTU cateogry significance command
            category_significance_cmd = "otu_category_significance.py -i %s -m %s -c %s -o %s %s" % (
                biom_fp,
                mapping_fp,
                category,
                category_signifance_fp,
                params_str,
            )
            commands.append([("OTU category significance (%s)" % category, category_significance_cmd)])

            index_links.append(
                ("Category significance (%s)" % category, category_signifance_fp, _index_headers["otu_category_sig"])
            )

    commands.append([("Compress the filtered BIOM table", "gzip %s" % filtered_biom_fp)])
    index_links.append(
        (
            "Filtered BIOM table (minimum sequence count: %d)" % sampling_depth,
            "%s.gz" % filtered_biom_fp,
            _index_headers["run_summary"],
        )
    )

    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links, index_fp)
Example #33
0
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    status_update_callback=print_to_stdout):
    """
    """

    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    
    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,'Log files'))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)
    
    
    bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
    even_dm_fps = run_beta_diversity_through_plots(
     otu_table_fp=biom_fp, 
     mapping_fp=mapping_fp,
     output_dir=bdiv_even_output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     sampling_depth=sampling_depth,
     # force suppression of distance histograms - boxplots work better
     # in this context, and are created below.
     histogram_categories=[],
     tree_fp=tree_fp,
     parallel=parallel,
     logger=logger,
     status_update_callback=status_update_callback)
    
    for bdiv_metric, dm_fp in even_dm_fps:
        for category in categories:
            boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
            try:
                params_str = get_params_str(params['make_distance_boxplots'])
            except KeyError:
                params_str = ''
            boxplots_cmd = \
             'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
             (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
            commands.append([('Boxplots (%s)' % category,
                              boxplots_cmd)])
            index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                '%s/%s_Distances.pdf' % \
                                 (boxplots_output_dir,category),
                                'Beta diversity results (even sampling: %d)' % sampling_depth))
            index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                '%s/%s_Stats.txt' % \
                                 (boxplots_output_dir,category),
                                'Beta diversity results (even sampling: %d)' % sampling_depth))
            
        index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
                            '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
                             (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                            'Beta diversity results (even sampling: %d)' % sampling_depth))
        index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric,
                            '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \
                             (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                            'Beta diversity results (even sampling: %d)' % sampling_depth))
        index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric,
                            '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \
                             (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                            'Beta diversity results (even sampling: %d)' % sampling_depth))
        index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric,
                            '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \
                             (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                            'Beta diversity results (even sampling: %d)' % sampling_depth))
        index_links.append(('Distance matrix (%s)' % bdiv_metric,
                            '%s/%s_dm.txt' % \
                             (bdiv_even_output_dir,bdiv_metric),
                            'Beta diversity results (even sampling: %d)' % sampling_depth))
        index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                            '%s/%s_pc.txt' % \
                             (bdiv_even_output_dir,bdiv_metric),
                            'Beta diversity results (even sampling: %d)' % sampling_depth))
        
    ## Alpha rarefaction workflow
    arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
    run_qiime_alpha_rarefaction(
     otu_table_fp=biom_fp,
     mapping_fp=mapping_fp,
     output_dir=arare_full_output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     tree_fp=tree_fp,
     num_steps=arare_num_steps,
     parallel=parallel,
     logger=logger,
     min_rare_depth=arare_min_rare_depth,
     max_rare_depth=sampling_depth,
     status_update_callback=status_update_callback)
    
    index_links.append(('Alpha rarefaction plots',
                        '%s/alpha_rarefaction_plots/rarefaction_plots.html'\
                          % arare_full_output_dir,
                        "Alpha rarefaction results"))
                        
    collated_alpha_diversity_fps = \
     glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
    try:
        params_str = get_params_str(params['compare_alpha_diversity'])
    except KeyError:
        params_str = ''
    for c in categories:
        for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
            alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
            alpha_comparison_output_fp = '%s/%s_%s.txt' % \
             (arare_full_output_dir,c,alpha_metric)
            compare_alpha_cmd = \
             'compare_alpha_diversity.py -i %s -m %s -c %s -d %s -o %s -n 999 %s' %\
             (collated_alpha_diversity_fp, mapping_fp, c, 
              sampling_depth, alpha_comparison_output_fp, params_str)
            commands.append([('Compare alpha diversity (%s, %s)' %\
                               (category,alpha_metric),
                              compare_alpha_cmd)])
            index_links.append(
             ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
              alpha_comparison_output_fp,
              "Alpha rarefaction results"))
    
    taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
    run_summarize_taxa_through_plots(
     otu_table_fp=biom_fp,
     mapping_fp=mapping_fp,
     output_dir=taxa_plots_output_dir,
     mapping_cat=None, 
     sort=True,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     logger=logger, 
     status_update_callback=status_update_callback)
    

    index_links.append(('Taxa summary bar plots',
                        '%s/taxa_summary_plots/bar_charts.html'\
                          % taxa_plots_output_dir,
                        "Taxonomic summary results"))
    index_links.append(('Taxa summary area plots',
                        '%s/taxa_summary_plots/area_charts.html'\
                          % taxa_plots_output_dir,
                        "Taxonomic summary results"))
    for c in categories:
        taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,c)
        run_summarize_taxa_through_plots(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=taxa_plots_output_dir,
         mapping_cat=c, 
         sort=True,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         logger=logger, 
         status_update_callback=status_update_callback)

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            "Taxonomic summary results (by %s)" % c))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            "Taxonomic summary results (by %s)" % c))
    
    # OTU category significance
    for category in categories:
        category_signifance_fp = \
         '%s/category_significance_%s.txt' % (output_dir, category)
        try:
            params_str = get_params_str(params['otu_category_significance'])
        except KeyError:
            params_str = ''
        # Build the OTU cateogry significance command
        category_significance_cmd = \
         'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
         (biom_fp, mapping_fp, category, 
          category_signifance_fp, params_str)
        commands.append([('OTU category significance (%s)' % category, 
                          category_significance_cmd)])
                          
        index_links.append(('Category significance (%s)' % category,
                    category_signifance_fp,
                    "Category results"))
    
    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links,index_fp)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_biom_fp = opts.input_biom_fp
    output_dir = opts.output_dir
    categories = opts.categories
    if categories is not None:
        categories = categories.split(',')
    tree_fp = opts.tree_fp
    mapping_fp = opts.mapping_fp
    verbose = opts.verbose
    parallel = opts.parallel
    sampling_depth = opts.sampling_depth
    nonphylogenetic_diversity = opts.nonphylogenetic_diversity
    print_only = opts.print_only
    suppress_taxa_summary = opts.suppress_taxa_summary
    suppress_beta_diversity = opts.suppress_beta_diversity
    suppress_alpha_diversity = opts.suppress_alpha_diversity
    suppress_group_significance = opts.suppress_group_significance

    if opts.parameter_fp is not None:
        params = parse_qiime_parameters(open(opts.parameter_fp, 'U'))
    else:
        params = parse_qiime_parameters([])

    if nonphylogenetic_diversity:
        # if the user specified --nonphylogenetic_diversity and they
        # didn't define metrics in a parameters file, define them here
        if 'metrics' not in params['beta_diversity']:
            params['beta_diversity']['metrics'] = 'bray_curtis'
        if 'metrics' not in params['alpha_diversity']:
            params['alpha_diversity']['metrics'] = 'observed_otus,chao1'
    else:
        if tree_fp is None:
            option_parser.error(
                "--tree_fp is required unless --nonphylogenetic_diversity "
                "is passed.")

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)

    # Create the output directory. If it already exists and the user
    # isn't trying to recover from a failed run, raise an error.
    create_dir(output_dir, fail_on_exist=not opts.recover_from_failure)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_core_diversity_analyses(
        biom_fp=input_biom_fp,
        mapping_fp=mapping_fp,
        sampling_depth=sampling_depth,
        output_dir=output_dir,
        qiime_config=load_qiime_config(),
        command_handler=command_handler,
        tree_fp=tree_fp,
        params=params,
        categories=categories,
        arare_min_rare_depth=10,
        arare_num_steps=10,
        parallel=parallel,
        suppress_taxa_summary=suppress_taxa_summary,
        suppress_beta_diversity=suppress_beta_diversity,
        suppress_alpha_diversity=suppress_alpha_diversity,
        suppress_group_significance=suppress_group_significance,
        status_update_callback=status_update_callback)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    #get all the options
    cd_dir=path.join(opts.fs_fp,'arare')
    tmp_prefix=get_tmp_filename('',suffix='').strip()
    output_dir=path.join(opts.fs_fp,'arare','arare_'+tmp_prefix)
    web_fp=path.join(opts.web_fp,'arare','arare_'+tmp_prefix)
    otu_table_fp=opts.otu_table_fp
    mapping_file_fp=opts.mapping_file_fp
    file_name_prefix=opts.fname_prefix
    user_id=int(opts.user_id)
    meta_id=int(opts.meta_id)
    bdiv_rarefied_at=int(opts.bdiv_rarefied_at)
    jobs_to_start=opts.jobs_to_start
    tree_fp=opts.tree_fp
    command_handler=call_commands_serially
    status_update_callback=no_status_updates
    zip_fpath=opts.zip_fpath
    zip_fpath_db=opts.zip_fpath_db
    run_date=opts.run_date
    force=True
    
    try:
        from data_access_connections import data_access_factory
        from enums import ServerConfig
        import cx_Oracle
        data_access = data_access_factory(ServerConfig.data_access_type)
    except ImportError:
        print "NOT IMPORTING QIIMEDATAACCESS"
        pass
        
    try:
        parameter_f = open(opts.params_path)
    except IOError:
        raise IOError,\
         "Can't open parameters file (%s). Does it exist? Do you have read access?"\
         % opts.params_path
    
    params=parse_qiime_parameters(parameter_f)
    
    try:
        makedirs(output_dir)
    except OSError:
        if force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose "+\
             "a different directory, or force overwrite with -f."
            exit(1)
    
    commands=[]
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params,
                            qiime_config=qiime_config)
    
    # determine whether to run alpha-diversity in serial or parallel
    serial_or_parallel = params['serial_or_parallel']['method']
    if serial_or_parallel=='Serial':
        arare_cmd='%s %s/alpha_rarefaction.py -i %s -m %s -o %s -t %s -p %s -f' %\
            (python_exe_fp, script_dir, otu_table_fp, mapping_file_fp, \
             output_dir,tree_fp,opts.params_path)
    else:
        arare_cmd='%s %s/alpha_rarefaction.py -i %s -m %s -o %s -t %s -a -O 50 -p %s -f' %\
            (python_exe_fp, script_dir, otu_table_fp, mapping_file_fp, \
             output_dir,tree_fp,opts.params_path)
    
    commands.append([('Alpha-Rarefaction',arare_cmd)])
    
    command_handler(commands, status_update_callback, logger)

    #zip the distance matrices
    cmd_call='cd %s; zip -r %s %s' % (cd_dir,zip_fpath,'arare_'+tmp_prefix)
    system(cmd_call)

    #convert link into web-link
    web_link=path.join(web_fp, 'alpha_rarefaction_plots',
                       'rarefaction_plots.html')
    
    #add the distance matrices
    valid=data_access.addMetaAnalysisFiles(True, int(meta_id), web_link, 
                                           'ARARE', run_date, 'ARARE')
    if not valid:
        raise ValueError, 'There was an issue uploading the filepaths to the DB!'
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    verbose = opts.verbose

    input_fps = opts.input_fps
    refseqs_fp = opts.reference_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = False
    percent_subsample = opts.percent_subsample
    new_ref_set_id = opts.new_ref_set_id
    prefilter_refseqs_fp = opts.prefilter_refseqs_fp
    prefilter_percent_id = opts.prefilter_percent_id
    if prefilter_percent_id == 0.0:
        prefilter_percent_id = None

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp)
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            print "Output directory already exists. Please choose "+\
             "a different directory, or force overwrite with -f."
            exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    if len(input_fps) == 1:
        pick_subsampled_open_referenence_otus(
            input_fp=input_fps[0],
            refseqs_fp=refseqs_fp,
            output_dir=output_dir,
            percent_subsample=percent_subsample,
            new_ref_set_id=new_ref_set_id,
            command_handler=command_handler,
            params=params,
            min_otu_size=opts.min_otu_size,
            qiime_config=qiime_config,
            prefilter_refseqs_fp=prefilter_refseqs_fp,
            prefilter_percent_id=prefilter_percent_id,
            step1_otu_map_fp=opts.step1_otu_map_fp,
            step1_failures_fasta_fp=opts.step1_failures_fasta_fp,
            parallel=parallel,
            suppress_step4=opts.suppress_step4,
            logger=None,
            status_update_callback=status_update_callback)
    else:
        iterative_pick_subsampled_open_referenence_otus(
            input_fps=input_fps,
            refseqs_fp=refseqs_fp,
            output_dir=output_dir,
            percent_subsample=percent_subsample,
            new_ref_set_id=new_ref_set_id,
            command_handler=command_handler,
            params=params,
            min_otu_size=opts.min_otu_size,
            qiime_config=qiime_config,
            prefilter_refseqs_fp=prefilter_refseqs_fp,
            prefilter_percent_id=prefilter_percent_id,
            step1_otu_map_fp=opts.step1_otu_map_fp,
            step1_failures_fasta_fp=opts.step1_failures_fasta_fp,
            parallel=parallel,
            suppress_step4=opts.suppress_step4,
            logger=None,
            status_update_callback=status_update_callback)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fp = opts.input_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp)
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose "+\
             "a different directory, or force overwrite with -f."
            exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_qiime_data_preparation(
     input_fp,
     output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     parallel=parallel,\
     status_update_callback=status_update_callback)
Example #38
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.output_dir is None:
        opts.output_dir = opts.output_filepath + '_dir'

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError("Can't open parameters file (%s). Does it exist? Do you have read access?"
                          % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    try:
        makedirs(opts.output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                                " a different directory, or force overwrite with -f.")

    if opts.print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if opts.verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    # set env variable
    if opts.platform == 'flx':
        existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE']
        new_pyro_fp = os.path.join(
            os.path.split(existing_pyro_fp)[0],
            'LookUp_E123.dat')
        os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp
    elif opts.platform == 'titanium':
        existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE']
        new_pyro_fp = os.path.join(
            os.path.split(existing_pyro_fp)[0],
            'LookUp_Titanium.dat')
        os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp
    else:
        raise RuntimeError(
            'could not find PYRO_LOOKUP_FILE for platform ' +
            platform)

    if opts.truncate_len:
        try:
            truncate_len_int_check = int(opts.truncate_len)
            truncate_len = str(truncate_len_int_check)
        except ValueError:
            raise ValueError(("If specified, truncate_len must be int type."))
    else:
        truncate_len = None

    run_ampliconnoise(
        mapping_fp=opts.mapping_fp,
        output_dir=os.path.abspath(opts.output_dir),
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        status_update_callback=status_update_callback,
        chimera_alpha=opts.chimera_alpha,
        chimera_beta=opts.chimera_beta,
        sff_txt_fp=opts.sff_filepath,
        numnodes=opts.np,
        suppress_perseus=opts.suppress_perseus,
        output_filepath=os.path.abspath(opts.output_filepath),
        platform=opts.platform,
        seqnoise_resolution=opts.seqnoise_resolution,
        truncate_len=truncate_len
    )
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    verbose = opts.verbose
    
    input_fps = opts.input_fps
    refseqs_fp = opts.reference_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = False
    percent_subsample = opts.percent_subsample
    new_ref_set_id = opts.new_ref_set_id
    prefilter_refseqs_fp = opts.prefilter_refseqs_fp
    prefilter_percent_id = opts.prefilter_percent_id
    if prefilter_percent_id == 0.0:
        prefilter_percent_id = None
    
    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)
    
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    if len(input_fps) == 1:
        pick_subsampled_open_reference_otus(input_fp=input_fps[0], 
                                  refseqs_fp=refseqs_fp,
                                  output_dir=output_dir,
                                  percent_subsample=percent_subsample,
                                  new_ref_set_id=new_ref_set_id,
                                  command_handler=command_handler,
                                  params=params,
                                  min_otu_size=opts.min_otu_size,
                                  run_assign_tax=not opts.suppress_taxonomy_assignment,
                                  run_align_and_tree=not opts.suppress_align_and_tree,
                                  qiime_config=qiime_config,
                                  prefilter_refseqs_fp=prefilter_refseqs_fp,
                                  prefilter_percent_id=prefilter_percent_id,
                                  step1_otu_map_fp=opts.step1_otu_map_fp,
                                  step1_failures_fasta_fp=opts.step1_failures_fasta_fp,
                                  parallel=parallel,
                                  suppress_step4=opts.suppress_step4,
                                  logger=None,
                                  status_update_callback=status_update_callback)
    else:    
        iterative_pick_subsampled_open_reference_otus(input_fps=input_fps,
                              refseqs_fp=refseqs_fp,
                              output_dir=output_dir,
                              percent_subsample=percent_subsample,
                              new_ref_set_id=new_ref_set_id,
                              command_handler=command_handler,
                              params=params,
                              min_otu_size=opts.min_otu_size,
                              run_assign_tax=not opts.suppress_taxonomy_assignment,
                              run_align_and_tree=not opts.suppress_align_and_tree,
                              qiime_config=qiime_config,
                              prefilter_refseqs_fp=prefilter_refseqs_fp,
                              prefilter_percent_id=prefilter_percent_id,
                              step1_otu_map_fp=opts.step1_otu_map_fp,
                              step1_failures_fasta_fp=opts.step1_failures_fasta_fp,
                              parallel=parallel,
                              suppress_step4=opts.suppress_step4,
                              logger=None,
                              status_update_callback=status_update_callback)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    study_id = opts.study_id

    for i in range(1,25):
        output_dir='/home/wwwdevuser/user_data/studies/study_%s/processed_data_%s/' % (study_id,i)
        if not exists(output_dir):
            break
        else:
            continue
            
    print output_dir
    sff_fname=opts.sff_fname
    map_fname = opts.map_fname
    verbose = opts.verbose
    print_only = opts.print_only
    parallel = opts.parallel
    denoise=opts.denoise
    
    if parallel: 
        raise_error_on_parallel_unavailable()

    try:
       parameter_f = open(opts.parameter_fp)
    except IOError:
        raise IOError,\
        "Can't open parameters file (%s). Does it exist? Do you have read access?"\
        % opts.parameter_fp

    try:
       makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose "+\
            "a different directory, or force overwrite with -f."
            exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = web_app_call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_process_sff_through_pick_otus(sff_input_fp=sff_fname,\
     mapping_fp=map_fname,\
     output_dir=output_dir,\
     denoise=denoise,\
     command_handler=command_handler,\
     params=parse_qiime_parameters(parameter_f),\
     qiime_config=qiime_config,\
     parallel=parallel,\
     status_update_callback=status_update_callback)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose

    input_fps = opts.input_fps
    refseqs_fp = opts.reference_fp
    output_dir = opts.output_dir
    otu_picking_method = opts.otu_picking_method
    verbose = opts.verbose
    print_only = False
    percent_subsample = opts.percent_subsample
    new_ref_set_id = opts.new_ref_set_id
    prefilter_refseqs_fp = opts.prefilter_refseqs_fp
    prefilter_percent_id = opts.prefilter_percent_id
    if prefilter_percent_id == 0.0:
        prefilter_percent_id = None

    if otu_picking_method == 'uclust':
        denovo_otu_picking_method = 'uclust'
        reference_otu_picking_method = 'uclust_ref'
    elif otu_picking_method == 'usearch61':
        denovo_otu_picking_method = 'usearch61'
        reference_otu_picking_method = 'usearch61_ref'
    elif otu_picking_method == 'sortmerna_sumaclust':
        denovo_otu_picking_method = 'sumaclust'
        reference_otu_picking_method = 'sortmerna'
        # SortMeRNA uses the E-value to filter out erroneous
        # sequences, this option does not apply for this 
        # tool
        if prefilter_percent_id > 0.0:
            prefilter_percent_id = None
    else:
        # it shouldn't be possible to get here
        option_parser.error('Unkown OTU picking method: %s' %
                            otu_picking_method)

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError("Can't open parameters file (%s). Does it exist? "
                          "Do you have read access?" % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel, option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            option_parser.error("Output directory already exists. Please "
                                "choose a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    if len(input_fps) == 1:
        pick_subsampled_open_reference_otus(input_fp=input_fps[0],
                                            refseqs_fp=refseqs_fp, output_dir=output_dir,
                                            percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id,
                                            command_handler=command_handler, params=params,
                                            min_otu_size=opts.min_otu_size,
                                            run_assign_tax=not opts.suppress_taxonomy_assignment,
                                            run_align_and_tree=not opts.suppress_align_and_tree,
                                            qiime_config=qiime_config,
                                            prefilter_refseqs_fp=prefilter_refseqs_fp,
                                            prefilter_percent_id=prefilter_percent_id,
                                            step1_otu_map_fp=opts.step1_otu_map_fp,
                                            step1_failures_fasta_fp=opts.step1_failures_fasta_fp,
                                            parallel=parallel, suppress_step4=opts.suppress_step4, logger=None,
                                            denovo_otu_picking_method=denovo_otu_picking_method,
                                            reference_otu_picking_method=reference_otu_picking_method,
                                            status_update_callback=status_update_callback)
    else:
        iterative_pick_subsampled_open_reference_otus(input_fps=input_fps,
                                                      refseqs_fp=refseqs_fp, output_dir=output_dir,
                                                      percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id,
                                                      command_handler=command_handler, params=params,
                                                      min_otu_size=opts.min_otu_size,
                                                      run_assign_tax=not opts.suppress_taxonomy_assignment,
                                                      run_align_and_tree=not opts.suppress_align_and_tree,
                                                      qiime_config=qiime_config,
                                                      prefilter_refseqs_fp=prefilter_refseqs_fp,
                                                      prefilter_percent_id=prefilter_percent_id,
                                                      step1_otu_map_fp=opts.step1_otu_map_fp,
                                                      step1_failures_fasta_fp=opts.step1_failures_fasta_fp,
                                                      parallel=parallel, suppress_step4=opts.suppress_step4, logger=None,
                                                      denovo_otu_picking_method=denovo_otu_picking_method,
                                                      reference_otu_picking_method=reference_otu_picking_method,
                                                      status_update_callback=status_update_callback)
Example #42
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fp = opts.input_fp
    reference_fp = opts.reference_fp
    taxonomy_fp = opts.taxonomy_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only
    assign_taxonomy = opts.assign_taxonomy

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    # if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError(
                "Can't open parameters file (%s). Does it exist? Do you have read access?"
                % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
        try:
            makedirs(output_dir)
        except OSError:
            if opts.force:
                pass
            else:
                option_parser.error(
                    "Output directory already exists. Please choose"
                    " a different directory, or force overwrite with -f.")

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_pick_closed_reference_otus(
        input_fp,
        reference_fp,
        output_dir,
        taxonomy_fp,
        assign_taxonomy=assign_taxonomy,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        parallel=parallel,
        status_update_callback=status_update_callback)
Example #43
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.output_dir is None:
        opts.output_dir = opts.output_filepath + '_dir'

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError(
                "Can't open parameters file (%s). Does it exist? Do you have read access?"
                % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    try:
        makedirs(opts.output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error(
                "Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if opts.print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if opts.verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    # set env variable
    if opts.platform == 'flx':
        existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE']
        new_pyro_fp = os.path.join(
            os.path.split(existing_pyro_fp)[0], 'LookUp_E123.dat')
        os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp
    elif opts.platform == 'titanium':
        existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE']
        new_pyro_fp = os.path.join(
            os.path.split(existing_pyro_fp)[0], 'LookUp_Titanium.dat')
        os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp
    else:
        raise RuntimeError('could not find PYRO_LOOKUP_FILE for platform ' +
                           platform)

    if opts.truncate_len:
        try:
            truncate_len_int_check = int(opts.truncate_len)
            truncate_len = str(truncate_len_int_check)
        except ValueError:
            raise ValueError(("If specified, truncate_len must be int type."))
    else:
        truncate_len = None

    run_ampliconnoise(mapping_fp=opts.mapping_fp,
                      output_dir=os.path.abspath(opts.output_dir),
                      command_handler=command_handler,
                      params=params,
                      qiime_config=qiime_config,
                      status_update_callback=status_update_callback,
                      chimera_alpha=opts.chimera_alpha,
                      chimera_beta=opts.chimera_beta,
                      sff_txt_fp=opts.sff_filepath,
                      numnodes=opts.np,
                      suppress_perseus=opts.suppress_perseus,
                      output_filepath=os.path.abspath(opts.output_filepath),
                      platform=opts.platform,
                      seqnoise_resolution=opts.seqnoise_resolution,
                      truncate_len=truncate_len)
Example #44
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fnas = opts.input_fnas
    input_quals = opts.input_quals
    output_dir = opts.output_dir
    sampling_depth = opts.seqs_per_sample
    categories = opts.categories
    reference_tree_fp = opts.reference_tree_fp
    mapping_fp = opts.mapping_fp
    verbose = opts.verbose
    print_only = False  # This feature is not currently supported
    suppress_split_libraries = opts.suppress_split_libraries
    even_sampling_keeps_all_samples = opts.even_sampling_keeps_all_samples

    parallel = opts.parallel

    if suppress_split_libraries and len(input_fnas) > 1:
        option_parser.error("Only a single fasta file can be passed with "+\
                            "--suppress_split_libraries")

    if opts.parameter_fp != None:
        try:
            parameter_f = open(opts.parameter_fp)
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
    else:
        params = parse_qiime_parameters([])

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose "+\
             "a different directory, or force overwrite with -f."
            exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    input_fnas_string = input_fnas[0]
    for inp_fna in input_fnas[1:]:
        input_fnas_string = input_fnas_string + ',' + inp_fna

    input_quals_string = None
    if input_quals:
        input_quals_string = input_quals[0]
        for inp_qual in input_quals[1:]:
            input_quals_string = input_quals_string + ',' + inp_qual

    run_core_qiime_analyses(
        fna_fps=input_fnas_string,
        qual_fps=input_quals_string,
        mapping_fp=mapping_fp,
        output_dir=output_dir,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        categories=categories,
        sampling_depth=sampling_depth,
        suppress_split_libraries=suppress_split_libraries,
        even_sampling_keeps_all_samples=even_sampling_keeps_all_samples,
        arare_min_rare_depth=10,
        arare_num_steps=10,
        reference_tree_fp=reference_tree_fp,
        parallel=parallel,
        status_update_callback=status_update_callback)
Example #45
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    verbose = opts.verbose
    print_only = opts.print_only
    seqs_per_sample = int(opts.seqs_per_sample)
    parallel = opts.parallel
    min_seqs_sample = opts.min_seqs_sample
    subject_category = opts.subject_name

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")


    ## ******************** make_evident_selectors ********************
    ## The code for make_evident_selectors.py is here and has to go before the params
    ## validation as we need to know the main cats before creating the params file
    map_data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
    biom_table = parse_biom_table(open(otu_table_fp, 'U'))

    # getting valid samples from biom file
    real_map_headers, real_map_data = filter_mapping_file(map_data, headers,\
        biom_table.SampleIds, include_repeat_cols=False)

    if subject_category not in real_map_headers:
        option_parser.error('This column: %s is not in the mapping file, try %s'%\
            (subject_category, real_map_headers))
 
    sorted_counts_per_sample = get_sorted_counts_per_sample(biom_table)

    mapping_file_tuple = (real_map_data, real_map_headers)

    # calculate the available subjects at each rarefaction level
    results, main_map_cat = make_selectors(sorted_counts_per_sample, min_seqs_sample,\
        mapping_file_tuple, subject_category, verbose=verbose)

    fout = open(join(output_dir,'selectors.txt'),'w')
    fout.write('#Sequences\tSubjects\tSamples\tMetadata\n')
    fout.write('\n'.join(results))
    fout.close()
    
    fout = open(join(output_dir,'mapping_file.txt'),'w')
    fout.write(format_mapping_file(real_map_headers, real_map_data))
    fout.close()
    ## ******************** make_evident_selectors ********************

    fout = open(join(output_dir,'study_preferences.txt'),'w')
    fout.write('%d\n' % seqs_per_sample)
    fout.write('%s\n' % subject_category)
    fout.close()

    ## ******************** filter_samples_from_otu_table ********************
    ## Filtering original biom file to only have samples above the max length to avoid
    ## ugly plots
    alpha_biom_file = join(output_dir,'filtered_otu_table_for_alpha.biom')
    fout = open(alpha_biom_file,'w')
    sample_ids_to_keep = biom_table.SampleIds
    filtered_otu_table = filter_samples_from_otu_table(biom_table,
                                                       sample_ids_to_keep,
                                                       min_count=seqs_per_sample,
                                                       max_count=inf)
    fout.write(format_biom_table(filtered_otu_table))
    fout.close()
    ## ******************** filter_samples_from_otu_table ********************

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            option_parser.error("Can't open parameters file (%s). Does it exist? " \
            "Do you have read access?" % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters(
            ['beta_diversity:metrics unweighted_unifrac',\
             'make_rarefaction_plots:prefs_path %s' % join(output_dir,'prefs.txt'),
             'make_rarefaction_plots:colorby %s' % ','.join(main_map_cat), 
             'make_rarefaction_plots:output_type memory', 
             'multiple_rarefactions:min %d' % int(seqs_per_sample/4),
             'multiple_rarefactions:max %d' % (seqs_per_sample+1),
             'multiple_rarefactions:step %d' % int(seqs_per_sample/4),
             'multiple_rarefactions:num-reps 4',
            ])
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)


    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
    
    copyfile(otu_table_fp, join(output_dir,'raw.biom'))
    
    run_beta_diversity_through_plots(otu_table_fp=otu_table_fp,
     mapping_fp=mapping_fp,
     output_dir=output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     color_by_interesting_fields_only=False,
     sampling_depth=seqs_per_sample,
     histogram_categories=None,
     tree_fp=tree_fp,
     parallel=parallel,
     suppress_3d_plots=True,
     suppress_2d_plots=True,
     status_update_callback=status_update_callback)
    
    output_dir = join(output_dir,'alpha')
    run_alpha_rarefaction(otu_table_fp=alpha_biom_file,\
     mapping_fp=mapping_fp,\
     output_dir=output_dir,\
     command_handler=command_handler,\
     params=params,
     qiime_config=qiime_config,\
     tree_fp=tree_fp,\
     num_steps=4,\
     parallel=parallel,\
     min_rare_depth=10,
     max_rare_depth=20,
     status_update_callback=status_update_callback,
     plot_stderr_and_stddev=True)
Example #46
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    num_steps = opts.num_steps
    verbose = opts.verbose
    print_only = opts.print_only
    parallel = opts.parallel
    min_rare_depth = opts.min_rare_depth
    max_rare_depth = opts.max_rare_depth
    
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp)
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)
                                   
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
     
    run_qiime_alpha_rarefaction(otu_table_fp=otu_table_fp,\
     mapping_fp=mapping_fp,\
     output_dir=output_dir,\
     command_handler=command_handler,\
     params=params,
     qiime_config=qiime_config,\
     tree_fp=tree_fp,\
     num_steps=num_steps,\
     parallel=parallel,\
     min_rare_depth=min_rare_depth,
     max_rare_depth=max_rare_depth,
     status_update_callback=status_update_callback)
Example #47
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    verbose = opts.verbose
    print_only = opts.print_only
    seqs_per_sample = opts.seqs_per_sample
    
    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
     
    run_beta_diversity_through_plots(otu_table_fp=otu_table_fp,
     mapping_fp=mapping_fp,
     output_dir=output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     color_by_interesting_fields_only=not opts.color_by_all_fields,
     sampling_depth=seqs_per_sample,
     tree_fp=tree_fp,
     parallel=parallel,
     suppress_emperor_plots=opts.suppress_emperor_plots,
     status_update_callback=status_update_callback)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose
    
    input_fp = opts.input_fp
    reference_fp = opts.reference_fp
    taxonomy_fp = opts.taxonomy_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only
    
    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)
    
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_pick_closed_reference_otus(
     input_fp, 
     reference_fp,
     output_dir,
     taxonomy_fp,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     parallel=parallel,
     status_update_callback=status_update_callback)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    parameters = {}

    # get the tree insertion method to use
    module = opts.insertion_method

    # create output directory
    output_dir = opts.output_dir
    create_dir(output_dir)

    # list of tree insertion methods
    tree_insertion_module_names = \
        {'raxml_v730': brokit.raxml_v730,
         'parsinsert': brokit.parsinsert,
         'pplacer': brokit.pplacer}

    # load input sequences and convert to phylip since the tools require
    # the query sequences to phylip-compliant names
    load_aln = parse_fasta(open(opts.input_fasta_fp, 'U'))
    aln = DenseAlignment(load_aln)
    seqs, align_map = aln.toPhylip()

    if opts.method_params_fp:
        param_dict = parse_qiime_parameters(open(opts.method_params_fp, 'U'))

    if module == 'raxml_v730':
        # load the reference sequences
        load_ref_aln = \
            DenseAlignment(parse_fasta(open(opts.refseq_fp, 'U')))

        # combine and load the reference plus query
        combined_aln = parse_fasta(StringIO(load_ref_aln.toFasta() +
                                                   '\n' + aln.toFasta()))
        # overwrite the alignment map
        aln = DenseAlignment(combined_aln)
        seqs, align_map = aln.toPhylip()

        try:
            parameters = param_dict['raxml']
        except:
            parameters = {}

        tree = convert_tree_tips(align_map, opts.starting_tree_fp)

        # write out the tree with phylip labels
        updated_tree_fp = join(output_dir,
                               '%s_phylip_named_tree.tre' % (module))
        write_updated_tree_file(updated_tree_fp, tree)

        # set the primary parameters for raxml
        parameters['-w'] = abspath(output_dir) + '/'
        parameters["-n"] = split(splitext(get_tmp_filename())[0])[-1]
        parameters["-t"] = updated_tree_fp

        if "-f" not in parameters:
            parameters["-f"] = 'v'
        if "-m" not in parameters:
            parameters["-m"] = 'GTRGAMMA'

    elif module == 'pplacer':
        try:
            parameters = param_dict['pplacer']
        except:
            parameters = {}

        # make sure stats file is passed
        if not opts.stats_fp:
            raise IOError(
                'When using pplacer, the RAxML produced info file is required.')

        # set the primary parameters for pplacer - allow for user-defined
        parameters['--out-dir'] = abspath(output_dir) + '/'
        parameters["-t"] = opts.starting_tree_fp
        parameters['-r'] = opts.refseq_fp
        parameters['-s'] = opts.stats_fp

    elif module == 'parsinsert':
        try:
            parameters = param_dict['parsinsert']
        except:
            parameters = {}

        # define log fp
        log_fp = join(output_dir, 'parsinsert.log')

        # define tax assignment values fp
        tax_assign_fp = join(output_dir, 'parsinsert_assignments.log')
        parameters["-l"] = log_fp
        parameters["-o"] = tax_assign_fp
        parameters["-s"] = opts.refseq_fp
        parameters["-t"] = opts.starting_tree_fp

    # call the module and return a tree object
    result = \
        tree_insertion_module_names[module].insert_sequences_into_tree(seqs,
                                                                       moltype=DNA, params=parameters)

    result_tree = strip_and_rename_unwanted_labels_from_tree(align_map, result)

    # write out the resulting tree
    final_tree = join(output_dir, '%s_final_placement.tre' % (module))
    write_updated_tree_file(final_tree, result)
Example #50
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    num_steps = opts.num_steps
    verbose = opts.verbose
    print_only = opts.print_only
    parallel = opts.parallel
    min_rare_depth = opts.min_rare_depth
    max_rare_depth = opts.max_rare_depth
    retain_intermediate_files = opts.retain_intermediate_files
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)
                                   
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
     
    run_alpha_rarefaction(otu_table_fp=otu_table_fp,\
     mapping_fp=mapping_fp,\
     output_dir=output_dir,\
     command_handler=command_handler,\
     params=params,
     qiime_config=qiime_config,\
     tree_fp=tree_fp,\
     num_steps=num_steps,\
     parallel=parallel,\
     min_rare_depth=min_rare_depth,
     max_rare_depth=max_rare_depth,
     status_update_callback=status_update_callback,
     retain_intermediate_files=retain_intermediate_files)
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    tree_fp = opts.tree_fp
    seqs_per_sample = opts.seqs_per_sample
    verbose = opts.verbose
    print_only = opts.print_only
    master_tree = opts.master_tree

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError("Can't open parameters file (%s). Does it exist? Do you have read access?"
                          % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_jackknifed_beta_diversity(otu_table_fp=otu_table_fp,
                                  tree_fp=tree_fp,
                                  seqs_per_sample=seqs_per_sample,
                                  output_dir=output_dir,
                                  command_handler=command_handler,
                                  params=params,
                                  qiime_config=qiime_config,
                                  mapping_fp=opts.mapping_fp,
                                  parallel=parallel,
                                  status_update_callback=status_update_callback,
                                  master_tree=master_tree)
def run_core_diversity_analyses(biom_fp,
                                mapping_fp,
                                sampling_depth,
                                output_dir,
                                qiime_config,
                                command_handler=call_commands_serially,
                                tree_fp=None,
                                params=None,
                                categories=None,
                                arare_min_rare_depth=10,
                                arare_num_steps=10,
                                parallel=False,
                                suppress_taxa_summary=False,
                                suppress_beta_diversity=False,
                                suppress_alpha_diversity=False,
                                suppress_group_significance=False,
                                status_update_callback=print_to_stdout):
    """
    """
    if categories is not None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
            parse_mapping_file_to_dict(open(mapping_fp, 'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError(
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" %
                    (c, ', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError(
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." %
                    c)

    else:
        categories = []
    comma_separated_categories = ','.join(categories)
    # prep some variables
    if params is None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []

    # begin logging
    old_log_fps = glob(join(output_dir, 'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(
        ('Master run log', log_fp, _index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(
            ('Previous run log', old_log_fp, _index_headers['run_summary']))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp is not None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
            "biom summarize-table -i %s -o %s %s" % \
            (biom_fp, biom_table_stats_output_fp, params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" %
                     biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics', biom_table_stats_output_fp,
                        _index_headers['run_summary']))

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
            (biom_fp, filtered_biom_fp, sampling_depth)
        commands.append([(
            'Filter low sequence count samples from table (minimum sequence count: %d)'
            % sampling_depth, filter_samples_cmd)])
    else:
        logger.write(
            "Skipping filter_samples_from_otu_table.py as %s exists.\n\n" %
            filtered_biom_fp)
    biom_fp = filtered_biom_fp

    # rarify the BIOM table to sampling_depth
    rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth)
    if not exists(rarefied_biom_fp):
        single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" %\
            (biom_fp, rarefied_biom_fp, sampling_depth)
        commands.append([
            ('Rarify the OTU table to %d sequences/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
    else:
        logger.write("Skipping single_rarefaction.py as %s exists.\n\n" %
                     rarefied_biom_fp)

    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands,
                        status_update_callback,
                        logger,
                        close_logger_on_success=False)
        commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir, sampling_depth)
        # Need to check for the existence of any distance matrices, since the user
        # can select which will be generated.
        existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
                otu_table_fp=rarefied_biom_fp,
                mapping_fp=mapping_fp,
                output_dir=bdiv_even_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                # Note: we pass sampling depth=None here as
                # we rarify the BIOM table above and pass that
                # in here.
                sampling_depth=None,
                tree_fp=tree_fp,
                parallel=parallel,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback)
        else:
            logger.write(
                "Skipping beta_diversity_through_plots.py as %s exist(s).\n\n"
                % ', '.join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip('_dm.txt'), fp)
                           for fp in existing_dm_fps]

        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params['make_distance_boxplots'])
        except KeyError:
            params_str = ''

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (
                    bdiv_even_output_dir, bdiv_metric)
                plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,
                                                          category)
                stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,
                                                       category)
                if not exists(plot_output_fp):
                    boxplots_cmd = \
                        'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                        (dm_fp, category, boxplots_output_dir,
                         mapping_fp, params_str)
                    commands.append([('Boxplots (%s)' % category, boxplots_cmd)
                                     ])
                else:
                    logger.write(
                        "Skipping make_distance_boxplots.py for %s as %s exists.\n\n"
                        % (category, plot_output_fp))
                index_links.append(
                    ('Distance boxplots (%s)' % bdiv_metric, plot_output_fp,
                     _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(
                    ('Distance boxplots statistics (%s)' % bdiv_metric,
                     stats_output_fp,
                     _index_headers['beta_diversity_even'] % sampling_depth))

            index_links.append(
                ('PCoA plot (%s)' % bdiv_metric,
                 '%s/%s_emperor_pcoa_plot/index.html' %
                 (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(
                ('Distance matrix (%s)' % bdiv_metric,
                 '%s/%s_dm.txt' % (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(
                ('Principal coordinate matrix (%s)' % bdiv_metric,
                 '%s/%s_pc.txt' % (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))

    if not suppress_alpha_diversity:
        # Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,
                                                     sampling_depth)
        rarefaction_plots_output_fp = \
            '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=arare_full_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                tree_fp=tree_fp,
                num_steps=arare_num_steps,
                parallel=parallel,
                logger=logger,
                min_rare_depth=arare_min_rare_depth,
                max_rare_depth=sampling_depth,
                suppress_md5=True,
                status_update_callback=status_update_callback,
                retain_intermediate_files=False)
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" %
                         rarefaction_plots_output_fp)

        index_links.append(
            ('Alpha rarefaction plots', rarefaction_plots_output_fp,
             _index_headers['alpha_diversity']))

        collated_alpha_diversity_fps = \
            glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''

        if len(categories) > 0:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(
                    split(collated_alpha_diversity_fp)[1])[0]
                compare_alpha_output_dir = '%s/compare_%s' % \
                    (arare_full_output_dir, alpha_metric)
                if not exists(compare_alpha_output_dir):
                    compare_alpha_cmd = \
                        'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                        (collated_alpha_diversity_fp,
                         mapping_fp,
                         comma_separated_categories,
                         compare_alpha_output_dir,
                         params_str)
                    commands.append([
                        ('Compare alpha diversity (%s)' % alpha_metric,
                         compare_alpha_cmd)
                    ])
                    for category in categories:
                        alpha_comparison_stat_fp = '%s/%s_stats.txt' % \
                            (compare_alpha_output_dir, category)
                        alpha_comparison_boxplot_fp = '%s/%s_boxplots.pdf' % \
                            (compare_alpha_output_dir, category)
                        index_links.append(
                            ('Alpha diversity statistics (%s, %s)' %
                             (category, alpha_metric),
                             alpha_comparison_stat_fp,
                             _index_headers['alpha_diversity']))
                        index_links.append(
                            ('Alpha diversity boxplots (%s, %s)' %
                             (category, alpha_metric),
                             alpha_comparison_boxplot_fp,
                             _index_headers['alpha_diversity']))
                else:
                    logger.write("Skipping compare_alpha_diversity.py"
                                 " for %s as %s exists.\n\n" %
                                 (alpha_metric, compare_alpha_output_dir))
        else:
            logger.write("Skipping compare_alpha_diversity.py as"
                         " no categories were provided.\n\n")

    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        # need to check for existence of any html files, since the user can
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(
            join(taxa_plots_output_dir, 'taxa_summary_plots', '*.html'))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=None,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback)
        else:
            logger.write(
                "Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n"
                % ', '.join(existing_taxa_plot_html_fps))

        index_links.append(
            ('Taxa summary bar plots',
             '%s/taxa_summary_plots/bar_charts.html' % taxa_plots_output_dir,
             _index_headers['taxa_summary']))
        index_links.append(
            ('Taxa summary area plots',
             '%s/taxa_summary_plots/area_charts.html' % taxa_plots_output_dir,
             _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,
                                                           category)
            # need to check for existence of any html files, since the user can
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' %
                                               taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                    otu_table_fp=biom_fp,
                    mapping_fp=mapping_fp,
                    output_dir=taxa_plots_output_dir,
                    mapping_cat=category,
                    sort=True,
                    command_handler=command_handler,
                    params=params,
                    qiime_config=qiime_config,
                    logger=logger,
                    suppress_md5=True,
                    status_update_callback=status_update_callback)
            else:
                logger.write(
                    "Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n"
                    % (category, ', '.join(existing_taxa_plot_html_fps)))

            index_links.append(
                ('Taxa summary bar plots',
                 '%s/taxa_summary_plots/bar_charts.html' %
                 taxa_plots_output_dir,
                 _index_headers['taxa_summary_categorical'] % category))
            index_links.append(
                ('Taxa summary area plots',
                 '%s/taxa_summary_plots/area_charts.html' %
                 taxa_plots_output_dir,
                 _index_headers['taxa_summary_categorical'] % category))

    if not suppress_group_significance:
        params_str = get_params_str(params['group_significance'])
        # group significance tests, aka category significance
        for category in categories:
            group_signifance_fp = \
                '%s/group_significance_%s.txt' % (output_dir, category)
            if not exists(group_signifance_fp):
                # Build the OTU cateogry significance command
                group_significance_cmd = \
                    'group_significance.py -i %s -m %s -c %s -o %s %s' %\
                    (rarefied_biom_fp, mapping_fp, category,
                     group_signifance_fp, params_str)
                commands.append([('Group significance (%s)' % category,
                                  group_significance_cmd)])
            else:
                logger.write(
                    "Skipping group_significance.py for %s as %s exists.\n\n" %
                    (category, group_signifance_fp))

            index_links.append(
                ('Category significance (%s)' % category, group_signifance_fp,
                 _index_headers['group_significance']))

    filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([('Compress the filtered BIOM table',
                          'gzip %s' % filtered_biom_fp)])
    else:
        logger.write(
            "Skipping compressing of filtered BIOM table as %s exists.\n\n" %
            filtered_biom_gzip_fp)
    index_links.append(
        ('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
         filtered_biom_gzip_fp, _index_headers['run_summary']))

    rarified_biom_gzip_fp = '%s.gz' % rarefied_biom_fp
    if not exists(rarified_biom_gzip_fp):
        commands.append([('Compress the rarified BIOM table',
                          'gzip %s' % rarefied_biom_fp)])
    else:
        logger.write(
            "Skipping compressing of rarified BIOM table as %s exists.\n\n" %
            rarified_biom_gzip_fp)
    index_links.append(
        ('Rarified BIOM table (sampling depth: %d)' % sampling_depth,
         rarified_biom_gzip_fp, _index_headers['run_summary']))

    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()

    generate_index_page(index_links, index_fp)