def setUp(self):
        """setup the test values"""
        
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        #this is specific to the web-apps only
        test_dir = abspath(dirname(__file__))
        self.fna_original_fp = os.path.join(test_dir, 'support_files', \
                                        'test.fna')

        tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(tmp_dir):
            makedirs(tmp_dir)
            
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(tmp_dir)
        
        self.wf_out = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_out',suffix='',result_constructor=str)
        if not exists(self.wf_out):
            makedirs(self.wf_out)         
            self.dirs_to_remove.append(self.wf_out)
        #print self.wf_out
        working_dir = self.qiime_config['working_dir'] or './'
        jobs_dir = join(working_dir,'jobs')
        if not exists(jobs_dir):
            # only clean up the jobs dir if it doesn't already exist
            self.dirs_to_remove.append(jobs_dir)
        self.params = parse_qiime_parameters(qiime_parameters_f.split('\n'))

        signal.signal(signal.SIGALRM, timeout)
        # set the 'alarm' to go off in allowed_seconds seconds
        signal.alarm(allowed_seconds_per_test)
Beispiel #2
0
 def setUp(self):
     """Set up some test variables"""
     self.qiime_config = load_qiime_config()
     self.tmp_dir = self.qiime_config["temp_dir"] or "/tmp/"
     self.input_file = get_tmp_filename(tmp_dir=self.tmp_dir)
     self.support_lines = support_lines.splitlines()
     self._paths_to_clean_up = []
Beispiel #3
0
def format_jnlp_file_lines(web_flag, url, tep_fp):
    """ Format the jnlp file for TopiaryExplorer """

    # write the jnlp header
    lines = [jnlp_top_block]

    # write the location of TopiaryExplorer location
    if(web_flag):
        lines += ['http://topiaryexplorer.sourceforge.net/app/']
    else:
        topiaryexplorer_project_dir =\
            load_qiime_config()['topiaryexplorer_project_dir']
        if topiaryexplorer_project_dir:
            lines += ['file:' + topiaryexplorer_project_dir]
        else:
            print "WARNING: Couldn't create jnlp file - topiaryexplorer_project_dir is not defined in your qiime_config. tep file was created sucessfully."

    # write the jnlp body text
    lines += [jnlp_middle_block]
    if(url):
        lines += [url]
    else:
        lines += [abspath(tep_fp)]

    # write the jnlp footer
    lines += [jnlp_bottom_block]

    return lines
def summarize_otus(processed_dir):
    """
    """
    per_library_stats_file = join(processed_dir, 'gg_97_otus/per_library_stats.txt')

    # Generate the per_library_stats_file if it doesn't already exist
    if not exists (per_library_stats_file):
        qiime_config = load_qiime_config()
        biom_file = join(processed_dir, 'gg_97_otus/exact_uclust_ref_otu_table.biom')
        python_exe_fp = qiime_config['python_exe_fp']
        script_dir = get_qiime_scripts_dir()
        per_library_stats_script = join(script_dir, 'per_library_stats.py')
        command = '{0} {1} -i {2}'.format(python_exe_fp, per_library_stats_script, biom_file)

        # Run the script and produce the per_library_stats.txt
        proc = Popen(command, shell = True, universal_newlines = True, stdout = PIPE, stderr = STDOUT)
        return_value = proc.wait()
        f = open(per_library_stats_file, 'w')
        f.write(proc.stdout.read())
        f.close()

    # File exists, parse out details
    start_lines = ['Seqs/sample detail:']
    header_lines, otu_summary_dict = parse_log_file(per_library_stats_file, start_lines)
    return header_lines, otu_summary_dict
    def setUp(self):

        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                         prefix='core_qiime_analyses_test_',
                                         suffix='')
        self.dirs_to_remove.append(self.test_out)

        # Get input data
        self.test_data = get_test_data_fps()

        self.qiime_config = load_qiime_config()
        self.qiime_config['jobs_to_start'] = 2
        self.qiime_config['seconds_to_sleep'] = 1

        # suppress stderr during tests (one of the systems calls in the
        # workflow prints a warning, and we can't suppress that warning with
        # warnings.filterwarnings) here because it comes from within the code
        # executed through the system call. Found this trick here:
        # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python
        self.saved_stderr = sys.stderr
        sys.stderr = StringIO()

        # Define number of seconds a test can run for before timing out
        # and failing
        initiate_timeout(600)
    def setUp(self):
        """Set up some test variables"""
        self.newick = "((s1:0.2,s2:0.2):0.6,s3:0.8);"
        self.tree = parse_newick(self.newick, PhyloNode)

        self.newick_scaled = "((s1:25,s2:25):75,s3:100);"
        self.tree_scaled = parse_newick(self.newick_scaled, PhyloNode)
        self.tree_scaled.scaleBranchLengths(max_length=100, ultrametric=True)

        self.num_trees_considered = 10
        self.trans_values = {(None, None) : ("#FFFFFF", ""),
                        (None, 0.5): ("#dddddd", "< 50%"),
                        (0.5, 0.7): ("#99CCFF", "50-70%"),
                        (0.7, 0.9): ("#82FF8B", "70-90%"),
                        (0.9, 0.999): ("#F8FE83", "90-99.9%"),
                        (0.999, None): ("#FF8582", "> 99.9%")}

        self.jack_newick = "((s1:0.2,s2:0.2)0.8:0.6,s3:0.8)1.0;"
        self.jack_tree = parse_newick(self.jack_newick, PhyloNode)

        self.jack_newick_scaled = "((s1:25,s2:25)0.8:75,s3:100)1.0;"
        self.jack_tree_scaled = parse_newick(self.jack_newick_scaled, PhyloNode)
        self.jack_tree_scaled.scaleBranchLengths(max_length=100,
            ultrametric=True)

        self.support = { 'trees_considered': 10,
            'support_dict': {"node0":1.0,
                            "node1":0.8}}

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        self.output_file = get_tmp_filename(tmp_dir = self.tmp_dir)

        dict_mapping_data = {}
        dict_mapping_data["s1"] = {
            'Description':'s1 test description',
            'NumIndividuals':'100',
            'BarcodeSequence':'AAAAAAAAAACT',
            'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1':'Value1',
            'ExampleHeader2':'Val2'}
        dict_mapping_data["s2"] = {
            'Description':'s2 test description',
            'NumIndividuals':'200',
            'BarcodeSequence':'CAAAAAAAAACT',
            'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1':'Value2',
            'ExampleHeader2':'Val1'}
        dict_mapping_data["s3"] = {
            'Description':'s3 test description',
            'NumIndividuals':'300',
            'BarcodeSequence':'GAAAAAAAAACT',
            'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1':'Value2',
            'ExampleHeader2':'Val3'}

        self.mapping_data = [dict_mapping_data,
            "Example comment string for test"]

        self._paths_to_clean_up = []
    def test_adjust_workers(self):
        """adjust_workers stops clients"""

        workers, client_sockets = self._setup_server_and_clients()
        last_sock = client_sockets[-1]

        qiime_config = load_qiime_config()
        min_per_core = int(qiime_config['denoiser_min_per_core'])

        # no sockets get stopped
        self.assertEqual(
            adjust_workers(
                4 *
                min_per_core -
                1,
                4,
                client_sockets),
            4)
        # if we can send something the socket is still alive
        self.assertEqual(last_sock.send("Hello"), 5)

        # now, kill one client
        self.assertEqual(
            adjust_workers(
                3 *
                min_per_core -
                1,
                4,
                client_sockets),
            3)
        # socket should be closed
        self.assertRaises(error, last_sock.send, "Hello")
Beispiel #8
0
def adjust_workers(num_flows, num_cpus, worker_sockets, log_fh=None):
    """Stop workers no longer needed.

    num_flows: number of flowgrams

    num_cpus: number of CPUs currently used

    worker_sockets: list of connected sockets

    log_fh: open fh to log file

    Returns new number of CPUs
    """

    qiime_config = load_qiime_config()
    min_per_core = int(qiime_config['denoiser_min_per_core'])
    if(num_flows < (num_cpus-1)*min_per_core):
        if log_fh:
            log_fh.write("Adjusting number of workers:\n")
            log_fh.write("flows: %d   cpus:%d\n" % (num_flows, num_cpus))
        # TODO: make sure this works with __future__ division
        per_core = max(min_per_core, (num_flows/num_cpus)+1)
        for i in range (num_cpus):
            if(i*per_core > num_flows):
                worker_sock = worker_sockets.pop()
                worker_sock.close()
                num_cpus = num_cpus-1
                if log_fh:
                    log_fh.write("released worker %d\n"% i)
        if log_fh:
            log_fh.write("New number of cpus:%d\n"% num_cpus)
    if (num_cpus == 0 or num_cpus!=len(worker_sockets)):
        raise ValueError, "Adjust_workers screwed up!"
    return num_cpus
Beispiel #9
0
    def setUp(self):
        # Get QIIME's temp dir
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self._paths_to_clean_up = []
        self._dirs_to_clean_up = []
    def setUp(self):
        """Set up some test variables"""
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.plot_name = "example_name"

        self.headers = {}
        self.headers[HEADERS_HOR] = ["Sample1", "Sample2", "Sample3", "Sample4"]
        self.headers[HEADERS_VER] = ["Sample1", "Sample2", "Sample3", "Sample4"]

        self.matrix = matrix

        self.trans_values = {}
        self.trans_values[(None, None)] = (0, "")
        self.trans_values[(0.0, 0.25)] = (1, "(0-25%)")
        self.trans_values[(0.25, 0.5)] = (2, "(25-50%)")
        self.trans_values[(0.5, 0.75)] = (3, "(50-75%)")
        self.trans_values[(0.75, 1.0)] = (4, "(75-100%)")

        self.output_dir = path.join(self.tmp_dir, self.plot_name)

        self.plot_name_ns = "not_a_square_matrix"
        self.headers_ns = {}
        self.headers_ns[HEADERS_HOR] = ["Sample1", "Sample2",
                                            "Sample3", "Sample4"]
        self.headers_ns[HEADERS_VER] = ["Sample1", "Sample2", "Sample3"]
        self.matrix_ns = not_a_square_matrix
        self.output_dir_ns = path.join(self.tmp_dir, self.plot_name_ns)

        self._paths_to_clean_up = []
        self._dirs_to_clean_up = []
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    match_barcodes = opts.match_barcodes
    barcode_indicator = opts.barcode_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['join_paired_ends'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)
    
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]
        
    pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator,
        match_barcodes, barcode_indicator)

    commands = create_commands_jpe(pairs, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, match_barcodes, bc_pairs)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Beispiel #12
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                prefix='core_qiime_analyses_test_',
                                suffix='')
        self.dirs_to_remove.append(self.test_out)

        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters(params_f1)

        # suppress stderr during tests (one of the systems calls in the
        # workflow prints a warning, and we can't suppress that warning with
        # warnings.filterwarnings) here because it comes from within the code
        # executed through the system call. Found this trick here:
        # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python
        self.saved_stderr = sys.stderr
        sys.stderr = StringIO()

        initiate_timeout(180)
Beispiel #13
0
    def setUp(self):
        # Get QIIME's temp dir
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        self.data = [0.1336206897, 0.2740524781, 0.5923268322]
        self.labels = ['o__Bacteroidales', 'o__Clostridiales', 'not_shared']

        self._paths_to_clean_up = []
Beispiel #14
0
    def setUp(self):
        # Get QIIME's temp dir
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self._paths_to_clean_up = []
        self._dirs_to_clean_up = []
        raise ValueError, "Test not implemented!!!"
    def setUp(self):
        """ """
        
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        #this is specific to the web-apps only
        test_dir = abspath(dirname(__file__))
        sff_original_fp = os.path.join(test_dir, 'support_files', \
                                        'Fasting_subset.sff')
        
        self.sff_fp = os.path.join('/%s/' % environ['HOME'], 
                                   'Fasting_subset.sff')
        self.files_to_remove.append(self.sff_fp)
        copy(sff_original_fp, self.sff_fp)
        
        self.illumina_fps = [os.path.join(test_dir, 'support_files', \
                                        's_8_1_sequence_100_records.txt'),
                             os.path.join(test_dir, 'support_files', \
                                        's_8_2_sequence_100_records.txt')]
        self.illumina_map_fp = os.path.join(test_dir, 'support_files', \
                                        's8_map_incomplete.txt')
    
        self.fasta_fps=[os.path.join(test_dir,'support_files',
                                   'test_split_lib_seqs.fasta')]
        self.fasta_map_fp = os.path.join(test_dir, 'support_files', \
                                        'fasta_mapping_file.txt')

        tmp_dir = "/%s/test_wf" % environ['HOME']
        self.dirs_to_remove.append(tmp_dir)
        
        #self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(tmp_dir):
            makedirs(tmp_dir)
            # if test creates the temp dir, also remove it
            #self.dirs_to_remove.append(tmp_dir)
            
        self.wf_out="/%s/test_processed_data" % environ['HOME']
        #print self.wf_out
        self.dirs_to_remove.append(self.wf_out)
        self.gg_out=os.path.join(self.wf_out,'gg_97_otus')
        if not exists(self.gg_out):
            makedirs(self.gg_out)
            #self.dirs_to_remove.append(self.gg_out)
            
        self.fasting_mapping_fp = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_mapping',suffix='.txt')
        fasting_mapping_f = open(self.fasting_mapping_fp,'w')
        fasting_mapping_f.write(fasting_map)
        fasting_mapping_f.close()
        self.files_to_remove.append(self.fasting_mapping_fp)
        
        self.params = parse_qiime_parameters(qiime_parameters_f)

        signal.signal(signal.SIGALRM, timeout)
        # set the 'alarm' to go off in allowed_seconds seconds
        signal.alarm(allowed_seconds_per_test)
 def setUp(self):
     """Set up some test variables"""
     # Get the tmp folder
     self.qiime_config = load_qiime_config()
     self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
     # Initialize some variables
     self.biom_table = biom_table.splitlines()
     self.mapping = mapping_file.splitlines()
     self._paths_to_clean_up = []
    def setUp(self):
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.l19_data = np.array([
            [7, 1, 0, 0, 0, 0, 0, 0, 0],
            [4, 2, 0, 0, 0, 1, 0, 0, 0],
            [2, 4, 0, 0, 0, 1, 0, 0, 0],
            [1, 7, 0, 0, 0, 0, 0, 0, 0],
            [0, 8, 0, 0, 0, 0, 0, 0, 0],
            [0, 7, 1, 0, 0, 0, 0, 0, 0],
            [0, 4, 2, 0, 0, 0, 2, 0, 0],
            [0, 2, 4, 0, 0, 0, 1, 0, 0],
            [0, 1, 7, 0, 0, 0, 0, 0, 0],
            [0, 0, 8, 0, 0, 0, 0, 0, 0],
            [0, 0, 7, 1, 0, 0, 0, 0, 0],
            [0, 0, 4, 2, 0, 0, 0, 3, 0],
            [0, 0, 2, 4, 0, 0, 0, 1, 0],
            [0, 0, 1, 7, 0, 0, 0, 0, 0],
            [0, 0, 0, 8, 0, 0, 0, 0, 0],
            [0, 0, 0, 7, 1, 0, 0, 0, 0],
            [0, 0, 0, 4, 2, 0, 0, 0, 4],
            [0, 0, 0, 2, 4, 0, 0, 0, 1],
            [0, 0, 0, 1, 7, 0, 0, 0, 0]
        ])
        self.l19_sample_names = [
            'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6',
            'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13',
            'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19']
        self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon',
                                'tax6', 'tax7', 'tax8', 'tax9']
        self.l19_taxon_names_w_underscore = ['ta_x1', 'tax2', 'tax3', 'tax4',
                                             'endbigtaxon', 'tax6', 'tax7',
                                             'tax8', 'tax9']

        l19 = Table(self.l19_data.T, self.l19_taxon_names,
                    self.l19_sample_names)
        fd, self.l19_fp = mkstemp(dir=self.tmp_dir,
                                prefix='test_bdiv_otu_table', suffix='.blom')
        os.close(fd)
        write_biom_table(l19, self.l19_fp)

        l19_w_underscore = Table(self.l19_data.T,
                                 self.l19_taxon_names_w_underscore,
                                 self.l19_sample_names)
        fd, self.l19_w_underscore_fp = mkstemp(dir=self.tmp_dir,
                                               prefix='test_bdiv_otu_table',
                                               suffix='.blom')
        os.close(fd)
        write_biom_table(l19_w_underscore, self.l19_w_underscore_fp)

        self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\
 ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));'
        self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode)

        self.files_to_remove = [self.l19_fp, self.l19_w_underscore_fp]
        self.folders_to_remove = []
    def test_denoiser_min_per_core(self):
        """denoiser_min_per_core is set to a good value"""

        qiime_config = load_qiime_config()
        min_per_core = qiime_config['denoiser_min_per_core']
        if (min_per_core):
            self.assertTrue( int(min_per_core) > 0)
        else:
            self.fail('denoiser_min_per_core not defined in qiime_config.')
Beispiel #19
0
    def setUp(self):
        """Define some test data."""
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []

        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(self.tmp_dir)

        self.otu_table1 = table_factory(data=array([[2, 0, 0, 1],
                                                   [1, 1, 1, 1],
                                                   [0, 0, 0, 0]]).T,
                                        sample_ids=list('XYZ'),
                                        observation_ids=list('abcd'),
                                        constructor=DenseOTUTable)
        fd, self.otu_table1_fp = mkstemp(dir=self.tmp_dir,
                                              prefix='alpha_diversity_tests',
                                              suffix='.biom')
        close(fd)
        open(self.otu_table1_fp, 'w').write(
            format_biom_table(self.otu_table1))

        self.otu_table2 = table_factory(data=array([[2, 0, 0, 1],
                                                   [1, 1, 1, 1],
                                                   [0, 0, 0, 0]]).T,
                                        sample_ids=list('XYZ'),
                                        observation_ids=['a', 'b', 'c', 'd_'],
                                        constructor=DenseOTUTable)
        fd, self.otu_table2_fp = mkstemp(dir=self.tmp_dir,
                                              prefix='alpha_diversity_tests',
                                              suffix='.biom')
        close(fd)
        open(self.otu_table2_fp, 'w').write(
            format_biom_table(self.otu_table2))

        self.single_sample_otu_table = table_factory(
            data=array([[2, 0, 0, 1]]).T,
            sample_ids=list('X'),
            observation_ids=list(
                'abcd'),
            constructor=DenseOTUTable)
        fd, self.single_sample_otu_table_fp = mkstemp(
            dir=self.tmp_dir,
            prefix='alpha_diversity_tests',
            suffix='.biom')
        close(fd)
        open(self.single_sample_otu_table_fp, 'w').write(
            format_biom_table(self.single_sample_otu_table))

        self.tree1 = parse_newick('((a:2,b:3):2,(c:1,d:2):7);')
        self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);")

        self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp,
                                self.single_sample_otu_table_fp]
Beispiel #20
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    qiime_config = load_qiime_config()
    
    rdp_jarpath = get_rdp_jarpath()
    if rdp_jarpath == None:
        rdp_version = "Not installed."
    else:
        rdp_version = split(rdp_jarpath)[1]

    java_version = get_java_version()
    if java_version is None:
        java_version = "Not installed."

    system_info = [
     ("Platform", platform),
     ("Python version",python_version.replace('\n', ' ')),
     ("Python executable",executable)]
    max_len =  max([len(e[0]) for e in system_info])
    print "\nSystem information"
    print  "==================" 
    for v in system_info:
        print "%*s:\t%s" % (max_len,v[0],v[1])

    version_info = [
     ("PyCogent version", pycogent_lib_version),
     ("NumPy version", numpy_lib_version),
     ("matplotlib version", matplotlib_lib_version),
     ("biom-format version", biom_lib_version),
     ("qcli version", qcli_lib_version),
     ("QIIME library version", get_qiime_library_version()),
     ("QIIME script version", __version__),
     ("PyNAST version (if installed)", pynast_lib_version),
     ("RDP Classifier version (if installed)", rdp_version),
     ("Java version (if installed)", java_version),
     ("Emperor version", emperor_lib_version)]

    max_len =  max([len(e[0]) for e in version_info])
    print "\nDependency versions"
    print  "===================" 
    for v in version_info:
        print "%*s:\t%s" % (max_len,v[0],v[1])
    
    print "\nQIIME config values"
    print  "==================="    
    max_len =  max([len(key) for key in qiime_config])
    for key,value in  qiime_config.items():
        print "%*s:\t%s"%(max_len,key,value)

    #run the Testcase.main function to do the tests
    # need to mess with the arg string, otherwise TestCase complains
    if (opts.test):
        print "\n\nrunning checks:\n"
        test_main(argv=["","-v"])
Beispiel #21
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    output_dir = opts.out_fp
    output_basename = splitext(split(otu_table_fp)[1])[0]
    
    if not output_dir:
        output_dir = 'make_tep_output/'
    
    create_dir(output_dir)
    
    tep_fp = '%s/%s.tep' % (output_dir,output_basename)      # opts.out_fp+'.tep'
    jnlp_fp = '%s/%s.jnlp' % (output_dir,output_basename)
    tepfile = open(tep_fp, 'w')
    otu_lines = open(otu_table_fp, 'U').readlines()
    sample_ids, otu_ids, otu_table, metadata = parse_otu_table(otu_lines)
    mapping_lines = open(mapping_fp, 'U')    
    tree_lines = open(tree_fp, 'U')
    
    lines = ['>>tre\n']
    lines += tree_lines.readlines() 
    lines += '\n'
    if(metadata):
        lines += '>>otm\n#OTU ID\tOTU Metadata\n'
        for i in range(len(otu_ids)):
            lines += otu_ids[i] + '\t'
            for m in metadata[i]:
                lines += m + ';'
            # lines = lines[:len(lines)-1]
            lines += '\n'
    lines += '>>osm\n'
    lines += otu_lines
    lines += '\n>>sam\n'
    lines += mapping_lines.readlines()
    
    tepfile.writelines(lines)
    
    jnlpfile = open(jnlp_fp, 'w')
    lines = [jnlp_top_block]
    if(opts.web_flag):
        lines += 'http://topiaryexplorer.sourceforge.net/app/'
    else:
        lines += 'file:'+load_qiime_config()['topiaryexplorer_project_dir']
    lines += jnlp_middle_block
    if(opts.url):
        lines += opts.url
    else:
        lines += os.path.abspath(tep_fp)
    # lines += os.path.abspath(tep_fp)
    lines += jnlp_bottom_block
    jnlpfile.writelines(lines)
Beispiel #22
0
def setup_workers(num_cpus, outdir, server_socket, verbose=True,
                  error_profile=None):
    """Start workers waiting for data.

    num_cpus: number of cores

    outdir: directory were the workers will work in

    server_socket: an open socket to the server

    verbose: verbose flag passed to the workers

    error_profile: filepath to the error profiles, passed to workers

"""

    qiime_config = load_qiime_config()
    DENOISE_WORKER = join(get_qiime_scripts_dir(), "denoiser_worker.py")
    CLOUD_DISPATCH = join(get_qiime_scripts_dir(), "ec2Dispatch")
    CLOUD_ENV = qiime_config['cloud_environment']
    CLOUD = not CLOUD_ENV == "False"

    workers = []
    client_sockets = []
    # somewhat unique id for cluster job
    tmpname = "".join(sample(list(lowercase), 8))

    host, port = server_socket.getsockname()

    # TODO: this should be set to a defined wait time using alarm()
    for i in range(num_cpus):
        name = outdir + ("/%sworker%d" % (tmpname, i))
        workers.append(name)
        if CLOUD:
            cmd = "%s %d %s %s -f %s -s %s -p %s" % (CLOUD_DISPATCH, i + 1, qiime_config['python_exe_fp'],
                                                     DENOISE_WORKER, name, host, port)
        else:
            cmd = "%s %s -f %s -s %s -p %s" % (qiime_config['python_exe_fp'],
                                               DENOISE_WORKER, name, host, port)

        if verbose:
            cmd += " -v"
        if error_profile:
            cmd += " -e %s" % error_profile

        submit_jobs([cmd], tmpname)
        # wait until the client connects
        # This might be a race condition -> make the client robust
        client_socket, client_address = server_socket.accept()
        client_sockets.append((client_socket, client_address))

    return workers, client_sockets
 def setUp(self):
     """Set up some test variables"""
     # Get the temp folder
     self.qiime_config = load_qiime_config()
     self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
     # Initialize some variables
     self.dict_links = {
         0:"""<a class="table_cell" target="_blank" href="weighted_unifrac_2d_continuous/weighted_unifrac_pc_2D_PCoA_plots.html">View 2d continuous coloring plots</a>""",
         1:"""<a class="table_cell" target="_blank" href="weighted_unifrac_2d_discrete/weighted_unifrac_pc_2D_PCoA_plots.html">View 2d discrete coloring plots</a>""",
         2:"""<a class="table_cell" target="_blank" href="index.html">View 3d plots</a>""",
         3:"""<a class="table_cell" target="_blank" href="weighted_unifrac_pc.txt">Download raw PCoA data (Right click - Save as)</a>"""
     }
     self._dirs_to_clean_up = []
    def setUp(self):
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        # Temporary input file with taxonomy
        fd, self.tmp_otu_fp = mkstemp(dir=self.tmp_dir,
                                        prefix='R_test_otu_table_',
                                           suffix='.biom')
        close(fd)
        seq_file = open(self.tmp_otu_fp, 'w')
        seq_file.write(test_otu_table)
        seq_file.close()

        # Temporary input file without taxonomy
        fd, self.tmp_otu_fp_no_taxa = mkstemp(dir=self.tmp_dir,
                                        prefix='R_test_otu_table_no_taxa',
                                           suffix='.biom')
        close(fd)
        seq_file = open(self.tmp_otu_fp_no_taxa, 'w')
        seq_file.write(test_otu_table_no_taxa)
        seq_file.close()

        fd, self.tmp_map_fp = mkstemp(dir=self.tmp_dir,
                                        prefix='R_test_map_',
                                           suffix='.txt')
        close(fd)
        seq_file = open(self.tmp_map_fp, 'w')
        seq_file.write(test_map)
        seq_file.close()

        fd, self.tmp_otu_fp_fitZIG_out = mkstemp(dir=self.tmp_dir,
                                                prefix='R_test_otu_table_fitZIG_out_',
                                                suffix='.txt')
        fd, self.tmp_otu_fp_DESeq2_out = mkstemp(dir=self.tmp_dir,
                                                prefix='R_test_otu_table_DESeq2_out_',
                                                suffix='.txt')
        fd, self.tmp_otu_fp_fitZIG_out_no_taxa = mkstemp(dir=self.tmp_dir,
                                                prefix='R_test_otu_table_fitZIG_out_no_taxa',
                                                suffix='.txt')
        fd, self.tmp_otu_fp_DESeq2_out_no_taxa = mkstemp(dir=self.tmp_dir,
                                                prefix='R_test_otu_table_DESeq2_out_no_taxa',
                                                suffix='.txt')

        self.files_to_remove = \
            [self.tmp_otu_fp, self.tmp_otu_fp_no_taxa, self.tmp_map_fp, self.tmp_otu_fp_fitZIG_out, self.tmp_otu_fp_DESeq2_out, self.tmp_otu_fp_fitZIG_out_no_taxa, self.tmp_otu_fp_DESeq2_out_no_taxa]

        DA_fitZIG(self.tmp_otu_fp_no_taxa, self.tmp_otu_fp_fitZIG_out_no_taxa, self.tmp_map_fp, 'Individual', 'S1', 'S2')
        DA_DESeq2(self.tmp_otu_fp_no_taxa, self.tmp_otu_fp_DESeq2_out_no_taxa, self.tmp_map_fp, 'Individual', 'S1', 'S2', DESeq2_diagnostic_plots=False)

        DA_fitZIG(self.tmp_otu_fp, self.tmp_otu_fp_fitZIG_out, self.tmp_map_fp, 'Individual', 'S1', 'S2')
        DA_DESeq2(self.tmp_otu_fp, self.tmp_otu_fp_DESeq2_out, self.tmp_map_fp, 'Individual', 'S1', 'S2', DESeq2_diagnostic_plots=False)
    def test_cluster_jobs_script(self):
        """cluster_jobs_fp is set to a good value"""

        qiime_config = load_qiime_config()
        submit_script = qiime_config['cluster_jobs_fp']
        if (submit_script):
            self.assertTrue(exists(submit_script),
                            "cluster_jobs_fp is not set to a valid path in qiime config: %s" % submit_script)
            #check if executable
            self.assertTrue(access(submit_script, X_OK),
                            "cluster_jobs_fp is not executable: %s" % submit_script)
        else:
            #Can't run in parallel, but not a critical error
            pass
    def setUp(self):
        """Set up some test variables"""
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config["temp_dir"] or "/tmp/"
        self.output_file = get_tmp_filename(tmp_dir=self.tmp_dir)

        self.d_data = {"s1": (0.005, 0.08), "s2": (0.0, 0.01), "s3": (0.02, 0.3), "s4": (0.82, 1.0)}

        self.title_0 = "Example title: 0"
        self.title_1 = "Example title: 1"

        self.name = "Example name"

        self._paths_to_clean_up = []
    def setUp(self):
        """ """
        
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        #this is specific to the web-apps only
        test_dir = abspath(dirname(__file__))
        sff_original_fp = os.path.join(test_dir, 'support_files', \
                                        'Fasting_subset.sff')

        # copy sff file to working directory
        self.sff_dir = tempfile.mkdtemp()
        self.dirs_to_remove.append(self.sff_dir)
        
        self.sff_fp = os.path.join(self.sff_dir, 'Fasting_subset.sff')
        copy(sff_original_fp, self.sff_fp)
        self.files_to_remove.append(self.sff_fp)
        
        tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'
        if not exists(tmp_dir):
            makedirs(tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(tmp_dir)
        
        self.wf_out = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_out',suffix='',result_constructor=str)
        self.dirs_to_remove.append(self.wf_out)
        
        self.fasting_mapping_fp = get_tmp_filename(tmp_dir=tmp_dir,
         prefix='qiime_wf_mapping',suffix='.txt')
        fasting_mapping_f = open(self.fasting_mapping_fp,'w')
        fasting_mapping_f.write(fasting_map)
        
        fasting_mapping_f.close()
        self.files_to_remove.append(self.fasting_mapping_fp)
        
        working_dir = self.qiime_config['working_dir'] or './'
        jobs_dir = join(working_dir,'jobs')
        if not exists(jobs_dir):
            # only clean up the jobs dir if it doesn't already exist
            self.dirs_to_remove.append(jobs_dir)
        self.params = parse_qiime_parameters(qiime_parameters_f.split('\n'))

        signal.signal(signal.SIGALRM, timeout)
        # set the 'alarm' to go off in allowed_seconds seconds
        signal.alarm(allowed_seconds_per_test)
Beispiel #28
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []

        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = mkdtemp(dir=tmp_dir,
                                prefix='core_qiime_analyses_test_',
                                suffix='')
        self.dirs_to_remove.append(self.test_out)

        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters([])

        initiate_timeout(60)
Beispiel #29
0
    def setUp(self):
        """Define some test data."""
        self.qiime_config = load_qiime_config()
        self.dirs_to_remove = []

        self.tmp_dir = self.qiime_config["temp_dir"] or "/tmp/"
        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(self.tmp_dir)

        self.otu_table1 = table_factory(
            data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T,
            sample_ids=list("XYZ"),
            observation_ids=list("abcd"),
            constructor=DenseOTUTable,
        )
        self.otu_table1_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str
        )
        open(self.otu_table1_fp, "w").write(format_biom_table(self.otu_table1))

        self.otu_table2 = table_factory(
            data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T,
            sample_ids=list("XYZ"),
            observation_ids=["a", "b", "c", "d_"],
            constructor=DenseOTUTable,
        )
        self.otu_table2_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str
        )
        open(self.otu_table2_fp, "w").write(format_biom_table(self.otu_table2))

        self.single_sample_otu_table = table_factory(
            data=array([[2, 0, 0, 1]]).T, sample_ids=list("X"), observation_ids=list("abcd"), constructor=DenseOTUTable
        )
        self.single_sample_otu_table_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str
        )
        open(self.single_sample_otu_table_fp, "w").write(format_biom_table(self.single_sample_otu_table))

        self.tree1 = parse_newick("((a:2,b:3):2,(c:1,d:2):7);")
        self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);")

        self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp, self.single_sample_otu_table_fp]
    def setUp(self):

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.otu_table_data = np.array([[2, 1, 0],
                                        [0, 5, 0],
                                        [0, 3, 0],
                                        [1, 2, 0]])
        self.sample_names = list('YXZ')
        self.taxon_names = list('bacd')
        self.otu_metadata = [{'domain': 'Archaea'},
                             {'domain': 'Bacteria'},
                             {'domain': 'Bacteria'},
                             {'domain': 'Bacteria'}]

        self.otu_table = Table(self.otu_table_data,
                               self.taxon_names,
                               self.sample_names,
                               observation_metadata=[{}, {}, {}, {}],
                               sample_metadata=[{}, {}, {}])

        self.otu_table_meta = Table(self.otu_table_data,
                                    self.taxon_names, self.sample_names,
                                    observation_metadata=self.otu_metadata)

        fd, self.otu_table_fp = mkstemp(dir=self.tmp_dir,
                                        prefix='test_rarefaction',
                                        suffix='.biom')
        close(fd)
        fd, self.otu_table_meta_fp = mkstemp(dir=self.tmp_dir,
                                             prefix='test_rarefaction',
                                             suffix='.biom')
        close(fd)

        self.rare_dir = mkdtemp(dir=self.tmp_dir,
                                prefix='test_rarefaction_dir', suffix='')

        write_biom_table(self.otu_table, self.otu_table_fp)
        write_biom_table(self.otu_table_meta, self.otu_table_meta_fp)

        self._paths_to_clean_up = [self.otu_table_fp, self.otu_table_meta_fp]
        self._dirs_to_clean_up = [self.rare_dir]
Beispiel #31
0
def get_flowgram_distances_on_cluster(id, flowgram, flowgrams, fc, ids, num_cores,
                                      num_flows, spread, client_sockets=[]):
    """Computes distance scores of flowgram to all flowgrams in parser.

    id: The flowgram identifier, also used to name intermediate files
    
    flowgram: This flowgram is used to filter all the other flowgrams

    flowgrams: iterable filehandle of flowgram file

    fc: a sink of flowgrams, which serves as source in the next round

    ids: list of flowgram ids that should be used from flowgrams

    num_cores: number of cpus

    num_flows: Number of flows in parser

    client_sockets: A list of open sockets for client-server communication
    
    spread: historical distribution of processing runtimes

    """
    epoch = time()

    check_flowgram_ali_exe()

    qiime_config = load_qiime_config()
    min_per_core = int(qiime_config['denoiser_min_per_core'])
    CLOUD = not qiime_config['cloud_environment'] == "False"
    #if using from future import division this has to be checked,
    #as we want true integer division here

    per_core = max(min_per_core, (num_flows/num_cores)+1)
    names = []
    scores = []
    
    #Need to call this here, since we iterate over the same iterator repeatedly.
    #Otherwise the call in ifilter will reset the iterator by implicitely  calling __iter__.
    #test if iter does the same
    flowgrams_iter=flowgrams.__iter__()
    #prepare input files and commands
    #synchronous client-server communication

    workload = compute_workload(num_cores, num_flows, spread)

    debug_count = 0
    for i in range(num_cores):
        socket = client_sockets[i]
        #send master flowgram to file first
        send_flowgram_to_socket(id, flowgram, socket)

        if(workload[i] < 1):
            #no data left for this poor guy
            save_send(socket, "--END--")             
            continue
        else:
            # Then add all others which are still valid, i.e. in ids
            for (k,f) in (izip (range(workload[i]),
                                ifilter(lambda f: ids.has_key(f.Name), flowgrams_iter))):
                fc.add(f)
                send_flowgram_to_socket(k, f, socket, trim=False)
                names.append(f.Name)
                debug_count += 1
            #send the termination signal
            save_send(socket, "--END--")             

    #asynchronous client-server communication
    #ClientHandlers write data in results
    results = [None]*num_cores #
    timing = [0.0 for x in xrange(num_cores)]
    for i in range (num_cores):
        socket = client_sockets[i]
        ClientHandler(socket, i, results, timing)
    loop()
    #end asynchronous loop
    
    spread = adjust_processing_time(num_cores, workload, timing, epoch)

    #flatten list
    scores = [item for list in results for item in list]

    if (debug_count != len(scores)):
        raise RuntimeError,"Something bad has happened! I received less " +\
            "alignment scores %d than there are flowgrams %d. Most likely this " \
            % (len(scores), debug_count)+\
            "means that the alignment program is not setup correctly or corrupted. "+\
            "Please run the test scripts to figure out the cause of the error."

    return (scores, names, fc) 
Beispiel #32
0
    def setUp(self):
        """Set up some test variables"""
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        data = {}
        data[LD_NAME] = get_tmp_filename(tmp_dir="",
                                         suffix='').replace("\"", "")

        headers = {}
        headers[LD_HEADERS_VER] = ["Sample1", "Sample2", "Sample3", "Sample4"]
        headers[LD_HEADERS_HOR] = ["Sample1", "Sample2", "Sample3", "Sample4"]
        data[LD_HEADERS] = headers

        matrix = [[None, 0.1, 0.9, 0.5], [None, None, 0.8, 0.7],
                  [None, None, None, 0.4], [None, None, None, None]]
        data[LD_MATRIX] = matrix

        trans_values = {}
        trans_values[(None, None)] = (0, "")
        trans_values[(0.0, 0.25)] = (1, "(0-25%)")
        trans_values[(0.25, 0.5)] = (2, "(25-50%)")
        trans_values[(0.5, 0.75)] = (3, "(50-75%)")
        trans_values[(0.75, 1.0)] = (4, "(75-100%)")
        data[LD_TRANSFORM_VALUES] = trans_values
        data[LD_TABLE_TITLE] = "Example table title"

        self.html_fp = path.join(self.tmp_dir, data[LD_NAME] + '.html')
        self.output_dir = path.join(self.tmp_dir, data[LD_NAME])

        self.list_data_single_plot = [data]

        data1 = {}
        data1[LD_NAME] = get_tmp_filename(tmp_dir="",
                                          suffix='').replace("\"", "")
        data1[LD_HEADERS] = headers
        data1[LD_MATRIX] = matrix
        data1[LD_TRANSFORM_VALUES] = trans_values
        data1[LD_TABLE_TITLE] = "Example table title"

        self.list_data_multiple_plots = [data, data1]

        dict_mapping_data = {}
        dict_mapping_data["Sample1"] = {
            'Description': 'Sample1 test description',
            'NumIndividuals': '100',
            'BarcodeSequence': 'AAAAAAAAAACT',
            'LinkerPrimerSequence': 'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1': 'Value1',
            'ExampleHeader2': 'Val2'
        }
        dict_mapping_data["Sample2"] = {
            'Description': 'Sample2 test description',
            'NumIndividuals': '200',
            'BarcodeSequence': 'CAAAAAAAAACT',
            'LinkerPrimerSequence': 'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1': 'Value2',
            'ExampleHeader2': 'Val1'
        }
        dict_mapping_data["Sample3"] = {
            'Description': 'Sample3 test description',
            'NumIndividuals': '300',
            'BarcodeSequence': 'GAAAAAAAAACT',
            'LinkerPrimerSequence': 'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1': 'Value2',
            'ExampleHeader2': 'Val3'
        }
        dict_mapping_data["Sample4"] = {
            'Description': 'Sample4 test description',
            'NumIndividuals': '400',
            'BarcodeSequence': 'TAAAAAAAAACT',
            'LinkerPrimerSequence': 'AAAAAAAAAAAAAAAAAAAAA',
            'ExampleHeader1': 'Value3',
            'ExampleHeader2': 'Val1'
        }
        self.mapping_data = [
            dict_mapping_data, "Example comment string for test"
        ]

        self._paths_to_clean_up = []
        self._dirs_to_clean_up = []
Beispiel #33
0
    def setUp(self):
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.l19_data = numpy.array([[7, 1, 0, 0, 0, 0, 0, 0, 0],
                                     [4, 2, 0, 0, 0, 1, 0, 0, 0],
                                     [2, 4, 0, 0, 0, 1, 0, 0, 0],
                                     [1, 7, 0, 0, 0, 0, 0, 0, 0],
                                     [0, 8, 0, 0, 0, 0, 0, 0, 0],
                                     [0, 7, 1, 0, 0, 0, 0, 0, 0],
                                     [0, 4, 2, 0, 0, 0, 2, 0, 0],
                                     [0, 2, 4, 0, 0, 0, 1, 0, 0],
                                     [0, 1, 7, 0, 0, 0, 0, 0, 0],
                                     [0, 0, 8, 0, 0, 0, 0, 0, 0],
                                     [0, 0, 7, 1, 0, 0, 0, 0, 0],
                                     [0, 0, 4, 2, 0, 0, 0, 3, 0],
                                     [0, 0, 2, 4, 0, 0, 0, 1, 0],
                                     [0, 0, 1, 7, 0, 0, 0, 0, 0],
                                     [0, 0, 0, 8, 0, 0, 0, 0, 0],
                                     [0, 0, 0, 7, 1, 0, 0, 0, 0],
                                     [0, 0, 0, 4, 2, 0, 0, 0, 4],
                                     [0, 0, 0, 2, 4, 0, 0, 0, 1],
                                     [0, 0, 0, 1, 7, 0, 0, 0, 0]])
        self.l19_sample_names = [
            'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6', 'sam7', 'sam8',
            'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', 'sam14', 'sam15',
            'sam16', 'sam17', 'sam18', 'sam19'
        ]
        self.l19_taxon_names = [
            'tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7',
            'tax8', 'tax9'
        ]
        self.l19_taxon_names_w_underscore = [
            'ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7',
            'tax8', 'tax9'
        ]

        l19_str = format_biom_table(
            DenseOTUTable(self.l19_data.T, self.l19_sample_names,
                          self.l19_taxon_names))
        fd, self.l19_fp = mkstemp(dir=self.tmp_dir,
                                  prefix='test_bdiv_otu_table',
                                  suffix='.blom')
        close(fd)
        open(self.l19_fp, 'w').write(l19_str)

        l19_str_w_underscore = format_biom_table(
            DenseOTUTable(self.l19_data.T, self.l19_sample_names,
                          self.l19_taxon_names_w_underscore))
        fd, self.l19_str_w_underscore_fp = mkstemp(
            dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom')
        close(fd)
        open(self.l19_str_w_underscore_fp, 'w').write(l19_str_w_underscore)

        self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\
 ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));'

        self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode)

        self.files_to_remove = [self.l19_fp, self.l19_str_w_underscore_fp]
        self.folders_to_remove = []
Beispiel #34
0
    def setUp(self):
        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.map_file = """#SampleID	Day	time	Description
#This is some comment about the study
1	090809	1200	some description of sample1
2	090809	1800	some description of sample2
3	090909	1200	some description of sample3
4	090909	1800	some description of sample4
5	091009	1200	some description of sample5"""
        self.cat_by_sample = {
            "1": [("Day", "090809"), ("time", "1200")],
            "2": [("Day", "090809"), ("time", "1800")],
            "3": [("Day", "090909"), ("time", "1200")],
            "4": [("Day", "090909"), ("time", "1800")],
            "5": [("Day", "091009"), ("time", "1200")]
        }
        self.sample_by_cat = {
            ("Day", "090809"): ["1", "2"],
            ("Day", "090909"): ["3", "4"],
            ("Day", "091009"): ["5"],
            ("time", "1200"): ["1", "3", "5"],
            ("time", "1800"): ["2", "4"]
        }

        self.num_cats = 2
        self.meta_dict = {
            "1": ["090809	1200", 0],
            "2": ["090809	1800", 0],
            "3": ["090909	1200", 0],
            "4": ["090909	1800", 0],
            "5": ["091009	1200", 0]
        }
        self.labels = ["from", "to", "eweight", "consensus_lin", "Day", "time"]
        self.node_labels = ["node_name", "node_disp_name", "ntype","degree", \
                   "weighted_degree","consensus_lin","Day","time"]
        self.label_list = [["090809", "090909", "091009"], ["1200", "1800"]]

        self.otu_table_vals = array([[0, 1, 0, 0, 6], [2, 0, 0, 0, 0],
                                     [0, 0, 3, 1, 0], [0, 0, 0, 0, 5],
                                     [0, 4, 2, 0, 0], [3, 6, 0, 0, 0],
                                     [0, 0, 4, 2, 0], [0, 0, 0, 0, 3],
                                     [2, 0, 0, 5, 0], [0, 2, 0, 4, 0]])

        otu_table_str = format_biom_table(
            table_factory(self.otu_table_vals, ['1', '2', '3', '4', '5'], [
                'otu_1', 'otu_2', 'otu_3', 'otu_4', 'otu_5', 'otu_6', 'otu_7',
                'otu_8', 'otu_9', 'otu_10'
            ], [None, None, None, None, None], [{
                "taxonomy": ["Bacteria", "Actinobacteria", "Coriobacteridae"]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Bacteroidaceae"
                ]
            }, {
                "taxonomy":
                ["Bacteria", "Firmicutes", "Clostridia", "Clostridiales"]
            }, {
                "taxonomy": [
                    "Bacteria", "Spirochaetes", "Spirochaetales",
                    "Spirochaetaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Rikenellaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Dysgonomonaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Odoribacteriaceae"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Dysgonomonaceae", "otu_425"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Bacteroidetes", "Bacteroidales",
                    "Dysgonomonaceae", "otu_425"
                ]
            }, {
                "taxonomy": [
                    "Bacteria", "Firmicutes", "Mollicutes",
                    "Clostridium_aff_innocuum_CM970"
                ]
            }]))

        self.otu_table_fp = get_tmp_filename(
            tmp_dir=self.tmp_dir,
            prefix='test_make_otu_network_otu_table',
            suffix='.biom')
        open(self.otu_table_fp, 'w').write(otu_table_str)

        self.otu_sample_file = """#Full OTU Counts
#OTU ID	1	2	3	4	5	Consensus Lineage
otu_1	0	1	0	0	6	Bacteria; Actinobacteria; Coriobacteridae
otu_2	2	0	0	0	0	Bacteria; Bacteroidetes; Bacteroidales; Bacteroidaceae
otu_3	0	0	3	1	0	Bacteria; Firmicutes; Clostridia; Clostridiales
otu_4	0	0	0	0	5	Bacteria; Spirochaetes; Spirochaetales; Spirochaetaceae
otu_5	0	4	2	0	0	Bacteria; Bacteroidetes; Bacteroidales; Rikenellaceae
otu_6	3	6	0	0	0	Bacteria; Bacteroidetes; Bacteroidales; Dysgonomonaceae
otu_7	0	0	4	2	0	Bacteria; Bacteroidetes; Bacteroidales; Odoribacteriaceae
otu_8	0	0	0	0	3	Bacteria; Bacteroidetes; Bacteroidales; Dysgonomonaceae; otu_425
otu_9	2	0	0	5	0	Bacteria; Bacteroidetes; Bacteroidales; Dysgonomonaceae; otu_425
otu_10	0	2	0	4	0	Bacteria; Firmicutes; Mollicutes; Clostridium_aff_innocuum_CM970"""

        self.con_by_sample = {
            '1': set(['2', '4']),
            '2': set(['5', '3', '1', '4']),
            '3': set(['4', '2']),
            '4': set(['3', '1', '2']),
            '5': set(['2'])
        }

        self.edge_file_str =["2	otu_1	1.0	Bacteria:Actinobacteria:Coriobacteridae	090809	1800",\
                             "5	otu_1	6.0	Bacteria:Actinobacteria:Coriobacteridae	091009	1200",\
                             "1	otu_2	2.0	Bacteria:Bacteroidetes:Bacteroidales:Bacteroidaceae	090809	1200",\
                             "3	otu_3	3.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1200",\
                             "4	otu_3	1.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1800",\
                             "5	otu_4	5.0	Bacteria:Spirochaetes:Spirochaetales:Spirochaetaceae	091009	1200",\
                             "2	otu_5	4.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090809	1800",\
                             "3	otu_5	2.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090909	1200",\
                             "1	otu_6	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1200",\
                             "2	otu_6	6.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1800",\
                             "3	otu_7	4.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1200",\
                             "4	otu_7	2.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1800",\
                             "5	otu_8	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	091009	1200",\
                             "1	otu_9	2.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090809	1200",\
                             "4	otu_9	5.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090909	1800",\
                             "2	otu_10	2.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090809	1800",\
                             "4	otu_10	4.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090909	1800"]

        self.node_file_str = [
            "1	1	user_node	3	7.0	other	090809	1200",
            "2	2	user_node	4	13.0	other	090809	1800",
            "3	3	user_node	3	9.0	other	090909	1200",
            "4	4	user_node	4	12.0	other	090909	1800",
            "5	5	user_node	3	14.0	other	091009	1200",
            "otu_1		otu_node	2	7.0	Bacteria:Actinobacteria:Coriobacteridae	otu	otu",
            "otu_2		otu_node	1	2.0	Bacteria:Bacteroidetes:Bacteroidales:Bacteroidaceae	otu	otu",
            "otu_3		otu_node	2	4.0	Bacteria:Firmicutes:Clostridia:Clostridiales	otu	otu",
            "otu_4		otu_node	1	5.0	Bacteria:Spirochaetes:Spirochaetales:Spirochaetaceae	otu	otu",
            "otu_5		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	otu	otu",
            "otu_6		otu_node	2	9.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	otu	otu",
            "otu_7		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	otu	otu",
            "otu_8		otu_node	1	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	otu	otu",
            "otu_9		otu_node	2	7.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	otu	otu",
            "otu_10		otu_node	2	6.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	otu	otu"
        ]


        self.red_edge_file_str =["2	otu_1	1.0	Bacteria:Actinobacteria:Coriobacteridae	090809	1800",\
                             "5	otu_1	6.0	Bacteria:Actinobacteria:Coriobacteridae	091009	1200",\
                             "1	@1	1.0	missed	090809	1200",\
                             "3	otu_3	3.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1200",
                             "4	otu_3	1.0	Bacteria:Firmicutes:Clostridia:Clostridiales	090909	1800",
                             "5	@5	1.0	missed	091009	1200",
                             "2	otu_5	4.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090809	1800",
                             "3	otu_5	2.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	090909	1200",
                             "1	otu_6	3.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1200",
                             "2	otu_6	6.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	090809	1800",
                             "3	otu_7	4.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1200",
                             "4	otu_7	2.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	090909	1800",
                             "1	otu_9	2.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090809	1200",
                             "4	otu_9	5.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	090909	1800",
                             "2	otu_10	2.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090809	1800",
                             "4	otu_10	4.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	090909	1800"]

        self.red_node_file_str = [
            "1	1	user_node	3	7.0	other	090809	1200",
            "2	2	user_node	4	13.0	other	090809	1800",
            "3	3	user_node	3	9.0	other	090909	1200",
            "4	4	user_node	4	12.0	other	090909	1800",
            "5	5	user_node	3	14.0	other	091009	1200",
            "otu_1		otu_node	2	7.0	Bacteria:Actinobacteria:Coriobacteridae	otu	otu",
            "@1		otu_collapsed	1	1.0	other	otu	otu",
            "otu_3		otu_node	2	4.0	Bacteria:Firmicutes:Clostridia:Clostridiales	otu	otu",
            "@5		otu_collapsed	2	2.0	other	otu	otu",
            "otu_5		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Rikenellaceae	otu	otu",
            "otu_6		otu_node	2	9.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae	otu	otu",
            "otu_7		otu_node	2	6.0	Bacteria:Bacteroidetes:Bacteroidales:Odoribacteriaceae	otu	otu",
            "otu_9		otu_node	2	7.0	Bacteria:Bacteroidetes:Bacteroidales:Dysgonomonaceae:otu_425	otu	otu",
            "otu_10		otu_node	2	6.0	Bacteria:Firmicutes:Mollicutes:Clostridium_aff_innocuum_CM970	otu	otu"
        ]

        self.otu_dc = {1: 3, 2: 7}
        self.sample_dc = {3: 3, 4: 2}
        self.degree_counts = {1: 3, 2: 7, 3: 3, 4: 2}

        self.num_con_cat = {"Day": 2, "time": 1}
        self.num_con = 6
        self.num_cat = {"Day": 2, "time": 4}
        self.num_cat_less = {"Day": 1, "time": 3}
        self._paths_to_clean_up = [self.otu_table_fp]
        self._dir_to_clean_up = ''
Beispiel #35
0
def pyroNoise_app(flows, num_flows, num_cpus=2, outdir = "/tmp/", log_fh=None,
                  precision=15.0, cut_off=0.05):
    """Runs pyronoise on flows and return basename of result files.

    flows: List of flowgrams
    
    num_flows: Number of flowgrams

    outdir: directory where intermediate files are stored

    num_cpus: number of cpus requested from mpirun
    """
    
    if(not (app_path("FDist") and app_path("QCluster") and app_path("PCluster"))):
        raise ApplicationNotFoundError,"PyroNoise binaries (FDist,QCluster,PCluster) not found."

    if(num_cpus>1 and not app_path("mpirun")):
        raise ApplicationError,"Can't run in parallel - mpirun not installed.\n"+\
            "Try running on one processor."
    # if mpi is not found, better raise Error and don't fall back to one cpu
    #        num_cpus = 1 #set to a save value
    #        if log_fh:
    #            log_fh.write("Warning: mpirun not found. Falling back to one cpu")

    basename = get_tmp_filename(tmp_dir=outdir, prefix = "", suffix="")
    #copy flowgrams from input sff.txt to pyronoise-formatted file
    filename, id_mapping = write_pyronoise_file(flows, num_flows, filename = basename+".dat")

    # if value is set use it, otherwise fall back to use default
    # hard coded in Pyronoise header files.
    data_fp = load_qiime_config()["pyronoise_data_fp"]
    data_opt = ""
    if data_fp:
        if not exists(data_fp):
            raise ApplicationError("File %s not exists. Check your setting of pyronoise_data_fp in the .qiime_config."
                                   %data_fp)
        data_opt = "-l %s" % data_fp

    if(num_cpus >1):
        mpi = "mpirun -np %d "% num_cpus
    else:
        mpi = ""
    cmd = mpi+ "FDist %s -in %s -out %s > /dev/null" % \
            (data_opt, filename, basename)
    
    if log_fh: 
        log_fh.write("Executing: %s\n" % cmd)
    system(cmd)

    #Check if fdist actually produced an output file
    if (not exists(basename+".fdist")):
        remove(filename)
        raise ApplicationError, "Something went wrong with PyroNoise."+\
            " If using mpi, make sure it's setup properly."

    #Qcluster is fast, so no mpi needed
    cmd = "QCluster -in %s.fdist -out %s >/dev/null" % (basename, basename) 
    if log_fh: 
         log_fh.write("Executing: %s\n" % cmd)
    system(cmd)

    cmd = mpi\
        + "PCluster %s -din %s -out %s -lin %s.list -s %f -c %f > %s.pout"\
        % (data_opt, filename, basename, basename, precision, cut_off, basename)
    if log_fh: 
        log_fh.write("Executing: %s\n" % cmd)
    system(cmd)
  
    remove(filename)
    return basename, id_mapping
Beispiel #36
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.haiku:
        print "QIIME provides insight\nmicrobial in nature\nto ecology"
        exit(0)

    qiime_config = load_qiime_config()
    test = opts.test
    qiime_base_install = opts.qiime_base_install

    rdp_jarpath = get_rdp_jarpath()
    if rdp_jarpath is None:
        rdp_version = "Not installed."
    else:
        rdp_version = split(rdp_jarpath)[1]

    java_version = get_java_version()
    if java_version is None:
        java_version = "Not installed."

    system_info = [("Platform", platform),
                   ("Python version", python_version.replace('\n', ' ')),
                   ("Python executable", executable)]
    max_len = max([len(e[0]) for e in system_info])
    print "\nSystem information"
    print "=================="
    for v in system_info:
        print "%*s:\t%s" % (max_len, v[0], v[1])

    version_info = [("NumPy version", numpy_lib_version),
                    ("SciPy version", scipy_lib_version),
                    ("matplotlib version", matplotlib_lib_version),
                    ("biom-format version", biom_lib_version),
                    ("qcli version", qcli_lib_version),
                    ("pyqi version", pyqi_lib_version),
                    ("scikit-bio version", skbio_lib_version),
                    ("QIIME library version", get_qiime_library_version()),
                    ("QIIME script version", __version__),
                    ("PyNAST version (if installed)", pynast_lib_version),
                    ("Emperor version", emperor_lib_version)]
    if not qiime_base_install:
        version_info += [("RDP Classifier version (if installed)",
                          rdp_version),
                         ("Java version (if installed)", java_version)]

    max_len = max([len(e[0]) for e in version_info])
    print "\nDependency versions"
    print "==================="
    for v in version_info:
        print "%*s:\t%s" % (max_len, v[0], v[1])

    print "\nQIIME config values"
    print "==================="
    max_len = max([len(key) for key in qiime_config])
    for key, value in qiime_config.items():
        print "%*s:\t%s" % (max_len, key, value)

    if test:
        if qiime_base_install:
            suite = TestLoader().loadTestsFromTestCase(QIIMEDependencyBase)
        else:
            suite = TestLoader().loadTestsFromTestCase(QIIMEDependencyFull)
        if opts.verbose:
            verbosity = 2
        else:
            verbosity = 1
        TextTestRunner(stream=stdout, verbosity=verbosity).run(suite)
Beispiel #37
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_biom_fp = opts.input_biom_fp
    output_dir = opts.output_dir
    categories = opts.categories
    if categories is not None:
        categories = categories.split(',')
    tree_fp = opts.tree_fp
    mapping_fp = opts.mapping_fp
    verbose = opts.verbose
    parallel = opts.parallel
    sampling_depth = opts.sampling_depth
    nonphylogenetic_diversity = opts.nonphylogenetic_diversity
    print_only = opts.print_only
    suppress_taxa_summary = opts.suppress_taxa_summary
    suppress_beta_diversity = opts.suppress_beta_diversity
    suppress_alpha_diversity = opts.suppress_alpha_diversity
    suppress_group_significance = opts.suppress_group_significance

    if opts.parameter_fp is not None:
        params = parse_qiime_parameters(open(opts.parameter_fp, 'U'))
    else:
        params = parse_qiime_parameters([])

    if nonphylogenetic_diversity:
        # if the user specified --nonphylogenetic_diversity and they
        # didn't define metrics in a parameters file, define them here
        if 'metrics' not in params['beta_diversity']:
            params['beta_diversity']['metrics'] = 'bray_curtis'
        if 'metrics' not in params['alpha_diversity']:
            params['alpha_diversity']['metrics'] = 'observed_otus,chao1'
    else:
        if tree_fp is None:
            option_parser.error(
                "--tree_fp is required unless --nonphylogenetic_diversity "
                "is passed.")

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    # Create the output directory. If it already exists and the user
    # isn't trying to recover from a failed run, raise an error.
    create_dir(output_dir, fail_on_exist=not opts.recover_from_failure)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_core_diversity_analyses(
        biom_fp=input_biom_fp,
        mapping_fp=mapping_fp,
        sampling_depth=sampling_depth,
        output_dir=output_dir,
        qiime_config=load_qiime_config(),
        command_handler=command_handler,
        tree_fp=tree_fp,
        params=params,
        categories=categories,
        arare_min_rare_depth=10,
        arare_num_steps=10,
        parallel=parallel,
        suppress_taxa_summary=suppress_taxa_summary,
        suppress_beta_diversity=suppress_beta_diversity,
        suppress_alpha_diversity=suppress_alpha_diversity,
        suppress_group_significance=suppress_group_significance,
        status_update_callback=status_update_callback)
Beispiel #38
0
class ParallelBlaster(ParallelWrapper):
    _script_name = load_qiime_config()['blastall_fp']
    _input_splitter = ParallelWrapper._split_fasta
    _job_prefix = 'BLAST'

    def _precommand_initiation(
            self, input_fp, output_dir, working_dir, params):
        if params['refseqs_path']:
            # Build the blast database from the refseqs_path -- all procs
            # will then access one db rather than create one per proc.
            blast_db, db_files_to_remove = \
                build_blast_db_from_fasta_path(params['refseqs_path'])
            self.files_to_remove += db_files_to_remove
            params['blast_db'] = blast_db

    def _get_job_commands(self, fasta_fps, output_dir, params, job_prefix,
                          working_dir, command_prefix=None,
                          command_suffix='; exit'):
        """Generate blastall commands which should be run."""
        # Create basenames for each of the output files. These will be filled
        # in to create the full list of files created by all of the runs.
        out_filenames = [job_prefix + '.%d_blast_out.txt']

        command_prefix = command_prefix or \
            '/bin/bash; export BLASTMAT=%s;' % params['blastmat_dir']

        if not params['disable_low_complexity_filter']:
            complexity_filter_str = 'T'
        else:
            complexity_filter_str = 'F'

        # Create lists to store the results.
        commands = []
        result_filepaths = []

        # Iterate over the input files.
        for i, fasta_fp in enumerate(fasta_fps):
            # Each run ends with moving the output file from the tmp dir to
            # the output_dir. Build the command to perform the move here.
            # rename_command, current_result_filepaths = \
            #        self._get_rename_command([fn % i for fn in out_filenames],
            #                                 working_dir, output_dir)
            #result_filepaths += current_result_filepaths

            # TODO should this be put in self._get_rename_command()?
            infile_basename = splitext(split(fasta_fp)[1])[0]
            working_outfile_path = '%s/%s_blast_out.txt' %\
                (working_dir, infile_basename)
            outfile_path = '%s/%s_blast_out.txt' % (output_dir,
                                                    infile_basename)
            rename_command = '; mv %s %s' % (working_outfile_path,
                                             outfile_path)
            result_filepaths.append(outfile_path)

            command = '%s %s -p blastn -m 9 -e %s -F %s -W %s -b %s -i %s -d %s > %s %s %s' % \
                (command_prefix,
                 self._script_name,
                 params['e_value'],
                 complexity_filter_str,
                 params['word_size'],
                 params['num_hits'],
                 fasta_fp,
                 params['blast_db'],
                 working_outfile_path,
                 rename_command,
                 command_suffix)
            commands.append(command)
        return commands, result_filepaths

    def _write_merge_map_file(self,
                              input_file_basename,
                              job_result_filepaths,
                              params,
                              output_dir,
                              merge_map_filepath,
                              failures=False):
        """
        """
        f = open(merge_map_filepath, 'w')
        out_filepath = '%s/%s_blast_out.txt' % (output_dir,
                                                input_file_basename)
        f.write('\t'.join(job_result_filepaths + [out_filepath]))
        f.write('\n')
        f.close()
Beispiel #39
0
__email__ = "*****@*****.**"
__status__ = "Development"

from os import makedirs
from qiime.util import (parse_command_line_parameters, 
                        make_option, 
                        get_options_lookup,
                        load_qiime_config,)
from qiime.parse import parse_qiime_parameters
from qiime.workflow.util import (validate_and_set_jobs_to_start, call_commands_serially,
                            print_commands, no_status_updates, print_to_stdout)
from qiime.workflow.pick_open_reference_otus import (
                        pick_subsampled_open_reference_otus,
                        iterative_pick_subsampled_open_reference_otus)

qiime_config = load_qiime_config()
options_lookup = get_options_lookup()


script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""

script_info['script_usage'] = []

script_info['script_usage'].append(("","Run the subsampled open-reference OTU picking workflow on seqs1.fna using refseqs.fna as the reference collection. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/","%prog -i $PWD/seqs1.fna -r $PWD/refseqs.fna -o $PWD/ucrss/ -s 0.1 -p $PWD/ucrss_params.txt"))

script_info['script_usage'].append(("","Run the subsampled open-reference OTU picking workflow on seqs1.fna using refseqs.fna as the reference collection and using usearch61 and usearch61_ref as the OTU picking methods. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/","%prog -i $PWD/seqs1.fna -r $PWD/refseqs.fna -o $PWD/ucrss_usearch/ -s 0.1 -p $PWD/ucrss_params.txt -m usearch61"))

script_info['script_usage'].append(("","Run the subsampled open-reference OTU picking workflow in iterative mode on seqs1.fna and seqs2.fna using refseqs.fna as the initial reference collection. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/","%prog -i $PWD/seqs1.fna,$PWD/seqs2.fna -r $PWD/refseqs.fna -o $PWD/ucrss_iter/ -s 0.1 -p $PWD/ucrss_params.txt"))
Beispiel #40
0
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    match_barcodes = opts.match_barcodes
    barcode_indicator = opts.barcode_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
                            "--include_input_dir_path must also be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['join_paired_ends'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]

    pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator,
                                match_barcodes, barcode_indicator)

    commands = create_commands_jpe(pairs, output_dir, params_str, leading_text,
                                   trailing_text, include_input_dir_path,
                                   remove_filepath_in_name, match_barcodes,
                                   bc_pairs)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
                            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping, read_indicator,
                                       barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method,
                                   output_dir, params_str, leading_text,
                                   trailing_text, include_input_dir_path,
                                   remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Beispiel #42
0
 def setUp(self):
     self.config = load_qiime_config()
Beispiel #43
0
    def setUp(self):
        """define some top-level data"""

        self.qiime_config = load_qiime_config()
        self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/'

        self.props = {
            "title": "PCoA - PC1 vs PC2",
            "ylabel": "PC2",
            "xlabel": "PC1"
        }
        self.props_scree = {
            "title": "Scree plor",
            "ylabel": "Fraction of variance",
            "xlabel": "Principal component"
        }
        self.data = {}
        self.data['coord']=[['Sample1','Sample2'],array([[-0.2,0.07],\
                            [-0.04,0.2]]),array([0.7,0.6]),\
                            array([25.00,30.00])]
        self.data['map']=[['#SampleID','Day'],['Sample1','Day1'],['Sample2',\
                          'Day1']]
        self.coord_tups = [("1", "2"), ("3", "2"), ("1", "3")]
        self.generate_eps = True
        self.data['alpha'] = 0.33
        self.groups = {}
        self.groups['Day1'] = ['Sample1', 'Sample2']
        self.colors = {}
        self.colors['Day1'] = 'blue1'
        self.prefs = {}
        self.prefs['Sample'] = {}
        self.prefs['Sample']['column'] = 'Day'
        self.data_color_hsv = {'blue1': (240, 100, 100)}
        self.data_color_order = ['blue1', []]
        self.background_color = 'black'
        self.label_color = 'white'
        self.dir_path = '/tmp/'
        self.data_file_link = '/tmp/'
        self.xy_coords = {}
        self.xy_coords['Sample1']=([-0.2], [0.07], ['Sample1: Day1'],\
                                   ['#0000ff'],['s'],[None],[None],[None])
        self.xy_coords['Sample2']=([-0.04], [0.2], ['Sample2: Day1'],\
                                   ['#0000ff'],['s'],[None],[None],[None])
        self.xy_coords_scree = {}
        self.xy_coords_scree['Variance'] = ([1, 2], [0.28, 0.12], 's', 'b')
        self.xy_coords_scree['Cum Variance'] = ([1, 2], [0.28, 0.40], 'o', 'r')

        self.coord_1 = '1'
        self.coord_2 = '2'

        self.p2d = {}
        self.p2d['Sample1'] = -0.2
        self.p2d['Sample2'] = -0.04
        self.p1d = {}
        self.p1d['Sample1'] = 0.07
        self.p1d['Sample2'] = 0.2
        self.all_cids = {}
        self.all_cids = ['Sample1: Day1', 'Sample2: Day1']
        self.all_xcoords = [100.79999999999998, 279.36000000000001]
        self.all_ycoords = [54.000000000000014, 288.0]
        self.plot_label = 'SampleID'
        self.coords={'pc vector number':['Sample1','Sample2'],'1':\
                     array([-0.2,-0.04]),'2':array([0.07, 0.2])}
        self.x_len = 4.5
        self.y_len = 4.5
        self.size = 20
        self.alpha = 0.33
        self._paths_to_clean_up = []
Beispiel #44
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.haiku:
        print "QIIME provides insight\nmicrobial in nature\nto ecology"
        exit(0)

    qiime_config = load_qiime_config()
    test = opts.test
    qiime_full_install = opts.qiime_full_install

    rdp_jarpath = get_rdp_jarpath()
    if rdp_jarpath is None:
        rdp_version = "Not installed."
    else:
        rdp_version = split(rdp_jarpath)[1]

    java_version = get_java_version()
    if java_version is None:
        java_version = "Not installed."

    system_info = [
        ("Platform", platform),
        ("Python version", python_version.replace('\n', ' ')),
        ("Python executable", executable)]
    max_len = max([len(e[0]) for e in system_info])
    print "\nSystem information"
    print "=================="
    for v in system_info:
        print "%*s:\t%s" % (max_len, v[0], v[1])

    print "\nQIIME default reference information"
    print "==================================="
    print "For details on what files are used as QIIME's default references, see here:"
    print " https://github.com/biocore/qiime-default-reference/releases/tag/%s" % qdr_lib_version

    version_info = [
        ("QIIME library version", get_qiime_library_version()),
        ("QIIME script version", __version__),
        ("qiime-default-reference version", qdr_lib_version),
        ("NumPy version", numpy_lib_version),
        ("SciPy version", scipy_lib_version),
        ("pandas version", pandas_lib_version),
        ("matplotlib version", matplotlib_lib_version),
        ("biom-format version", biom_lib_version),
        ("h5py version", h5py_lib_version),
        ("qcli version", qcli_lib_version),
        ("pyqi version", pyqi_lib_version),
        ("scikit-bio version", skbio_lib_version),
        ("PyNAST version", pynast_lib_version),
        ("Emperor version", emperor_lib_version),
        ("burrito version", burrito_lib_version),
        ("burrito-fillings version", bfillings_lib_version),
        ("sortmerna version", sortmerna_lib_version),
        ("sumaclust version", sumaclust_lib_version),
        ("swarm version", swarm_lib_version),
        ("gdata", gdata_installed)
    ]

    if qiime_full_install:
        version_info += [
            ("RDP Classifier version (if installed)", rdp_version),
            ("Java version (if installed)", java_version)]

    max_len = max([len(e[0]) for e in version_info])
    print "\nDependency versions"
    print "==================="
    for v in version_info:
        print "%*s:\t%s" % (max_len, v[0], v[1])

    print "\nQIIME config values"
    print "==================="
    print "For definitions of these settings and to learn how to configure QIIME, see here:"
    print " http://qiime.org/install/qiime_config.html"
    print " http://qiime.org/tutorials/parallel_qiime.html\n"
    max_len = max([len(key) for key in qiime_config])
    for key, value in qiime_config.items():
        print "%*s:\t%s" % (max_len, key, value)

    if test:
        if qiime_full_install:
            print "\nQIIME full install test results"
            print "==============================="
            suite = TestLoader().loadTestsFromTestCase(QIIMEDependencyFull)
        else:
            print "\nQIIME base install test results"
            print "==============================="
            suite = TestLoader().loadTestsFromTestCase(QIIMEDependencyBase)
        if opts.verbose:
            verbosity = 2
        else:
            verbosity = 1
        TextTestRunner(stream=stdout, verbosity=verbosity).run(suite)