else: W.add_experiment_name(experiment_name, description) default_params = {"c_thresh": 0.90, "n_filter": 8, "maskN": False} # Choose method to split files # =============================================================================== splitby = None subgroups = {"zebra": ".*zebra.*", "gazelle": ".*gazelle.*"} if splitby == "subgroups": W.setup_clustering( mode="split_by_subgroups", infiles_pattern="lane*-clean.fastq.bgzf", default_params=default_params, subgroups=subgroups, ) elif splitby == "tags": W.setup_clustering(mode="split_by_tags", infiles_pattern="lane*-clean.fastq.bgzf", default_params=default_params) # =============================================================================== # Varibles to change, 1 dictionary per run run_parameters = [{"c_thresh": 1.0}, {"c_thresh": 0.90}] W.run_clustering(run_parameters, threads=1) # Next steps # Delete all fasta files
# Clustering #=============================================================================== if testing: W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups') else: W.add_experiment_name(experiment_name, description) default_params = { 'c_thresh' : 0.90, 'n_filter' : 8, 'maskN' : False} subgroups = { 'zebra' : '.*zebra.*', 'gazelle' : '.*gazelle.*'} if testing: W.setup_clustering(mode='split_by_subgroups', infiles_pattern='test*-clean.fastq.bgzf', default_params=default_params, subgroups=subgroups) # W.setup_clustering(mode='split_by_tags', infiles_pattern='test*-clean.fastq.bgzf', # default_params=default_params) else: W.setup_clustering(mode='split_by_subgroups', infiles_pattern='lane*-clean.fastq.bgzf', default_params=default_params, subgroups=subgroups) # W.setup_clustering(mode='split_by_tags', infiles_pattern='lane*-clean.fastq.bgzf', # default_params=default_params) # Varibles to change, 1 dictionary per run run_parameters = [ { 'c_thresh' : 1.0}, { 'c_thresh' : 0.90}, ] W.run_clustering(run_parameters, threads=1)
# Clustering #=============================================================================== if testing: W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups') else: W.add_experiment_name(experiment_name, description) default_params = { 'c_thresh' : 0.90, 'n_filter' : 8, 'maskN' : False} subgroups = { 'zebra' : '.*zebra.*', 'gazelle' : '.*gazelle.*'} if testing: W.setup_clustering(mode='split_by_subgroups', infiles_pattern='test*-clean.fastq.bgzf', default_params=default_params, subgroups=subgroups) # W.setup_clustering(mode='split_by_tags', infiles_pattern='test*-clean.fastq.bgzf', # default_params=default_params) else: # W.setup_clustering(mode='split_by_subgroups', infiles_pattern='lane*-clean.fastq.bgzf', # default_params=default_params, subgroups=subgroups) # W.setup_clustering(mode='split_by_tags', infiles_pattern='lane*-clean.fastq.bgzf', # default_params=default_params) # W.setup_clustering(mode='no_split_separate', infiles_pattern='*.bgzf', # infiles_path=W.c.tag_splitby_subgroup_outpath, # default_params=default_params) W.setup_clustering(mode='no_split_separate', infiles_pattern='*.bgzf', infiles_path=W.c.tag_splitby_sample_outpath, default_params=default_params) # Varibles to change, 1 dictionary per run