Exemple #1
0
def main(args):
    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))

    # additional argument checks
    if not os.path.isdir(args.working_dir):
        raise ValueError('Working directory not found')
    args.working_dir = os.path.realpath(args.working_dir) + '/'
    if os.path.isdir(args.working_dir + 'analysis/'):
        shutil.rmtree(args.working_dir + 'analysis/')

    options_dict = dict()
    options_dict['wd_envs'] = hp.parse_output_path(args.working_dir + 'envs/')
    options_dict['threads'] = args.threads_per_job
    options_dict['ref_fasta'] = os.path.realpath(args.ref_fasta)
    options_dict['reads_fastq'] = args.working_dir + 'all_reads.fastq'
    options_dict['wd_analysis'] = hp.parse_output_path(args.working_dir +
                                                       'analysis/')
    options_dict[
        'wd_analysis_condas'] = __location__ + '/analysis_conda_files/'
    options_dict['__location__'] = __location__

    # --- create output directories
    if os.path.isdir(options_dict['wd_analysis']):
        shutil.rmtree(options_dict['wd_analysis'])
    _ = hp.parse_output_path(options_dict['wd_analysis'] + 'quast')
    _ = hp.parse_output_path(options_dict['wd_analysis'] + 'jellyfish')
    _ = hp.parse_output_path(options_dict['wd_analysis'] + 'readset_analysis')

    options_dict['wd_analysis_summary'] = hp.parse_output_path(
        options_dict['wd_analysis'] + 'summary/')
    options_dict[
        'wd_assembler_results'] = args.working_dir + 'assembler_results/'
    options_dict[
        'wd_assemblies'] = args.working_dir + 'assembler_results/assemblies/'
    assemblies_list = hp.parse_input_path(options_dict['wd_assemblies'],
                                          pattern='*.fasta')
    if len(assemblies_list) == 0:
        raise ValueError('No succesful assemblies found to analyze!')
    assemblies_names_list = [
        os.path.splitext(os.path.basename(af))[0] for af in assemblies_list
    ]
    options_dict['assemblies_string'] = ' '.join(assemblies_names_list)
    with open(args.user_info, 'r') as f:
        md_yaml = yaml.load(f)
    md = Metadata(md_yaml)
    md.write_publication_info(options_dict['wd_analysis_summary'] +
                              'publication_info.yaml')
    # --- Quast ---
    options_dict['quast_options'] = ''
    if md.is_eukaryote:
        options_dict['quast_options'] += '-e '
    if args.gff_file:
        options_dict['quast_options'] += '-G ' + os.path.abspath(
            args.gff_file) + ' '
    quast_output = ''
    quast_output_cmd = ''
    for anl in assemblies_names_list:
        quast_output += (
            ',\n\t\t{anl}_fplot=\'{wd_analysis_summary}quast/{anl}.fplot\''
            ',\n\t\t{anl}_rplot=\'{wd_analysis_summary}quast/{anl}.rplot\''
        ).format(anl=anl,
                 wd_analysis_summary=options_dict['wd_analysis_summary'])
        quast_output_cmd += (
            'if [ -e contigs_reports/nucmer_output/{anl}.fplot ]; then '  # for quast <5.0.0
            'cp contigs_reports/nucmer_output/{anl}.fplot {wd_analysis_summary}quast/.\n'
            'cp contigs_reports/nucmer_output/{anl}.rplot {wd_analysis_summary}quast/.\n'
            'fi\n').format(
                anl=anl,
                wd_analysis_summary=options_dict['wd_analysis_summary'])
        quast_output_cmd += (
            'if [ -e contigs_reports/all_alignments_{anl}.tsv ]; then '  # for quast =>5.0.0
            'cp contigs_reports/all_alignments_{anl}.tsv {wd_analysis_summary}quast/.\n'
            'fi\n').format(
                anl=anl,
                wd_analysis_summary=options_dict['wd_analysis_summary'])
    options_dict['quast_output'] = quast_output
    options_dict['quast_output_cmd'] = quast_output_cmd

    # --- Construct snakemake file ---
    sf_fn = args.working_dir + 'Snakefile_analysis_' + datetime.datetime.now(
    ).strftime('%Y%m%d%H%M%S')
    with open(__location__ + '/Snakemake_analysis', 'r') as f:
        sf = f.read()

    sf = sf.format(**options_dict)
    with open(sf_fn, 'w') as sf_handle:
        sf_handle.write(sf)

    sm_dict = {'use_conda': True}

    if args.slurm_config is not None:
        sm_dict['cluster'] = 'sbatch'
        sm_dict['cluster_config'] = args.slurm_config
        sm_dict['nodes'] = 5

    snakemake.snakemake(sf_fn, **sm_dict)