else:
    W.add_experiment_name(experiment_name, description)

default_params = {"c_thresh": 0.90, "n_filter": 8, "maskN": False}

# Choose method to split files
# ===============================================================================

splitby = None

subgroups = {"zebra": ".*zebra.*", "gazelle": ".*gazelle.*"}

if splitby == "subgroups":
    W.setup_clustering(
        mode="split_by_subgroups",
        infiles_pattern="lane*-clean.fastq.bgzf",
        default_params=default_params,
        subgroups=subgroups,
    )
elif splitby == "tags":
    W.setup_clustering(mode="split_by_tags", infiles_pattern="lane*-clean.fastq.bgzf", default_params=default_params)
# ===============================================================================

# Varibles to change, 1 dictionary per run
run_parameters = [{"c_thresh": 1.0}, {"c_thresh": 0.90}]

W.run_clustering(run_parameters, threads=1)

# Next steps

# Delete all fasta files
Beispiel #2
0
# Clustering 
#===============================================================================
if testing:
    W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups')
else:
    W.add_experiment_name(experiment_name, description)

default_params = { 'c_thresh' : 0.90,
                   'n_filter' : 8,
                    'maskN' : False}

subgroups = { 'zebra'  : '.*zebra.*',
            'gazelle' : '.*gazelle.*'}

if testing:
    W.setup_clustering(mode='split_by_subgroups', infiles_pattern='test*-clean.fastq.bgzf',
                     default_params=default_params, subgroups=subgroups) 
#     W.setup_clustering(mode='split_by_tags', infiles_pattern='test*-clean.fastq.bgzf',
#                      default_params=default_params) 
else:
    W.setup_clustering(mode='split_by_subgroups', infiles_pattern='lane*-clean.fastq.bgzf',
                     default_params=default_params, subgroups=subgroups) 
#     W.setup_clustering(mode='split_by_tags', infiles_pattern='lane*-clean.fastq.bgzf',
#                      default_params=default_params) 

# Varibles to change, 1 dictionary per run
run_parameters = [ 
                    { 'c_thresh' : 1.0},
                    { 'c_thresh' : 0.90},
                   ]

W.run_clustering(run_parameters, threads=1)
# Clustering 
#===============================================================================
if testing:
    W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups')
else:
    W.add_experiment_name(experiment_name, description)

default_params = { 'c_thresh' : 0.90,
                   'n_filter' : 8,
                    'maskN' : False}

subgroups = { 'zebra'  : '.*zebra.*',
            'gazelle' : '.*gazelle.*'}

if testing:
    W.setup_clustering(mode='split_by_subgroups', infiles_pattern='test*-clean.fastq.bgzf',
                     default_params=default_params, subgroups=subgroups) 
#     W.setup_clustering(mode='split_by_tags', infiles_pattern='test*-clean.fastq.bgzf',
#                      default_params=default_params) 
else:
#     W.setup_clustering(mode='split_by_subgroups', infiles_pattern='lane*-clean.fastq.bgzf',
#                      default_params=default_params, subgroups=subgroups) 
#     W.setup_clustering(mode='split_by_tags', infiles_pattern='lane*-clean.fastq.bgzf',
#                      default_params=default_params) 
#     W.setup_clustering(mode='no_split_separate', infiles_pattern='*.bgzf', 
#                        infiles_path=W.c.tag_splitby_subgroup_outpath, 
#                        default_params=default_params) 
    W.setup_clustering(mode='no_split_separate', infiles_pattern='*.bgzf', 
                       infiles_path=W.c.tag_splitby_sample_outpath, 
                       default_params=default_params) 

# Varibles to change, 1 dictionary per run