Esempio n. 1
0
def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon, jarMaxMemory, trueCoverage_config, rematch_script, species_genus, mlst_scheme_genus):
    threads = args.threads
    adaptersFasta = args.adapters
    if adaptersFasta is not None:
        adaptersFasta = os.path.abspath(adaptersFasta.name)
    genomeSize = args.genomeSizeExpectedMb
    skipped = [None, None, 0, {'sample': 'Skipped'}]
    not_run = [None, None, 0, {'sample': 'Not run'}]

    runs = {}

    # Run FastQ integrity check
    not_corruption_found, pass_qc, time_taken, failing, fastq_encoding, min_reads_length, max_reads_length = fastQintegrity.runFastQintegrity(fastq_files, threads, outdir)
    runs['FastQ_Integrity'] = [not_corruption_found, pass_qc, time_taken, failing]

    if not_corruption_found:
        # Run first Estimated Coverage
        run_successfully_estimatedCoverage = False
        estimatedCoverage = None
        run_successfully_trueCoverage = False
        pass_qc_trueCoverage = False
        if not args.skipEstimatedCoverage:
            # Check whether the Estimated Coverage output is already present
            report_file = os.path.join(outdir, 'coverage_report.txt')
            if os.path.isfile(report_file):
                os.remove(report_file)
            # Run getEstimatedCoverage
            run_successfully_estimatedCoverage, pass_qc, time_taken, failing, estimatedCoverage = coverage.getEstimatedCoverage(fastq_files, genomeSize, outdir, threads, args.estimatedMinimumCoverage)
            runs['first_Coverage'] = [run_successfully_estimatedCoverage, pass_qc, time_taken, failing]
        else:
            print '--skipEstimatedCoverage set. Skipping First Estimated Coverage analysis'
            runs['first_Coverage'] = skipped

        trimmomatic_run_successfully = False

        if args.skipEstimatedCoverage or (run_successfully_estimatedCoverage and not estimatedCoverage < args.estimatedMinimumCoverage):
            if not args.skipTrueCoverage and trueCoverage_config is not None:
                # Run True Coverage
                run_successfully_trueCoverage, pass_qc_trueCoverage, time_taken, failing = trueCoverage.runTrueCoverage(sampleName, fastq_files, trueCoverage_config['reference_file'], threads, outdir, trueCoverage_config['length_extra_seq'], trueCoverage_config['minimum_depth_presence'], trueCoverage_config['minimum_depth_call'], trueCoverage_config['minimum_depth_frequency_dominant_allele'], trueCoverage_config['minimum_gene_coverage'], False, False, 1, trueCoverage_config['minimum_gene_identity'], trueCoverage_config, rematch_script)
                runs['trueCoverage_ReMatCh'] = [run_successfully_trueCoverage, pass_qc_trueCoverage, time_taken, failing]
            else:
                print '\n' + '--skipTrueCoverage set. Skipping True coverage analysis'
                runs['trueCoverage_ReMatCh'] = skipped

            if args.skipTrueCoverage or trueCoverage_config is None or (run_successfully_trueCoverage and pass_qc_trueCoverage):
                # Run first FastQC
                nts2clip_based_ntsContent = None
                if not args.skipFastQC:
                    run_successfully, pass_qc, time_taken, failing, warning, maximum_reads_length, nts2clip_based_ntsContent = fastqc.runFastQCanalysis(outdir, threads, adaptersFasta, fastq_files, args.fastQCkeepFiles, 'first_run')
                    runs['first_FastQC'] = [run_successfully, pass_qc, time_taken, failing, warning]
                else:
                    print '--skipFastQC set. Skipping First FastQC analysis'
                    runs['first_FastQC'] = skipped + ['NA']

                # Run Trimmomatic
                if not args.skipTrimmomatic:
                    run_successfully, not_empty_fastq, time_taken, failing, paired_reads, trimmomatic_folder, fileSize = trimmomatic.runTrimmomatic(jar_path_trimmomatic, sampleName, outdir, threads, adaptersFasta, script_path, args.doNotSearchAdapters, fastq_files, max_reads_length, args.doNotTrimCrops, args.trimCrop, args.trimHeadCrop, args.trimLeading, args.trimTrailing, args.trimSlidingWindow, args.trimMinLength, nts2clip_based_ntsContent, jarMaxMemory, fastq_encoding)
                    runs['Trimmomatic'] = [run_successfully, None, time_taken, failing, fileSize]
                    trimmomatic_run_successfully = run_successfully

                    if run_successfully and not_empty_fastq:
                        fastq_files = paired_reads
                        min_reads_length = args.trimMinLength

                        # Run second Estimated Coverage
                        if not args.skipEstimatedCoverage:
                            run_successfully_estimatedCoverage, pass_qc, time_taken, failing, estimatedCoverage = coverage.getEstimatedCoverage(fastq_files, genomeSize, outdir, threads, args.estimatedMinimumCoverage)
                            runs['second_Coverage'] = [run_successfully_estimatedCoverage, pass_qc, time_taken, failing]
                        else:
                            print '--skipEstimatedCoverage set. Skipping Second Estimated Coverage analysis'
                            runs['second_Coverage'] = skipped

                        if args.skipEstimatedCoverage or (run_successfully_estimatedCoverage and not estimatedCoverage < args.estimatedMinimumCoverage):
                            # Run second FastQC
                            if not args.skipFastQC:
                                run_successfully, pass_qc, time_taken, failing, warning, maximum_reads_length, nts2clip_based_ntsContent = fastqc.runFastQCanalysis(outdir, threads, adaptersFasta, fastq_files, args.fastQCkeepFiles, 'second_run')
                                runs['second_FastQC'] = [run_successfully, pass_qc, time_taken, failing, warning]
                                if run_successfully:
                                    max_reads_length = maximum_reads_length
                            else:
                                print '--skipFastQC set. Skipping Second FastQC analysis'
                                runs['second_FastQC'] = skipped + ['NA']
                        else:
                            print '\n' + 'Estimated coverage is too lower (< ' + str(args.estimatedMinimumCoverage) + 'x). This sample will not proceed with INNUca pipeline'
                            runs['second_FastQC'] = not_run + ['NA']
                            runs['Pear'] = not_run + ['NA']
                            runs['SPAdes'] = not_run + ['NA']
                            runs['Assembly_Mapping'] = not_run + ['NA']
                            runs['Pilon'] = not_run
                            runs['MLST'] = not_run + ['NA']
                    else:
                        print 'Trimmomatic did not run successfully or return zero reads! Skipping Second Estimated Coverage analysis and FastQC analysis'
                        runs['second_Coverage'] = skipped
                        runs['second_FastQC'] = skipped + ['NA']

                else:
                    print '--skipTrimmomatic set. Skipping Trimmomatic, but also Second FastQC analysis and Second Estimated Coverage analysis'
                    runs['Trimmomatic'] = skipped + ['NA']
                    runs['second_Coverage'] = skipped
                    runs['second_FastQC'] = skipped + ['NA']

                if not args.skipFastQC and (runs['second_FastQC'][1] or (runs['second_FastQC'][1] is None and runs['first_FastQC'][1])) is False and not args.fastQCproceed:
                    print '\n' + 'This sample does not pass FastQC module QA/QC. It will not proceed with INNUca pipeline'
                    runs['Pear'] = not_run + ['NA']
                    runs['SPAdes'] = not_run + ['NA']
                    runs['Assembly_Mapping'] = not_run + ['NA']
                    runs['Pilon'] = not_run
                    runs['MLST'] = not_run + ['NA']
            else:
                print '\n' + 'This sample does not pass True Coverage module QA/QC. This sample will not proceed with INNUca pipeline'
                runs['first_FastQC'] = not_run + ['NA']
                runs['Trimmomatic'] = not_run + ['NA']
                runs['second_Coverage'] = not_run
                runs['second_FastQC'] = not_run + ['NA']
                runs['Pear'] = not_run + ['NA']
                runs['SPAdes'] = not_run + ['NA']
                runs['Assembly_Mapping'] = not_run + ['NA']
                runs['Pilon'] = not_run
                runs['MLST'] = not_run + ['NA']

        else:
            print '\n' + 'Estimated coverage is too lower (< ' + str(args.estimatedMinimumCoverage) + 'x). This sample will not proceed with INNUca pipeline'
            runs['trueCoverage_ReMatCh'] = not_run
            runs['first_FastQC'] = not_run + ['NA']
            runs['Trimmomatic'] = not_run + ['NA']
            runs['second_Coverage'] = not_run
            runs['second_FastQC'] = not_run + ['NA']
            runs['Pear'] = not_run + ['NA']
            runs['SPAdes'] = not_run + ['NA']
            runs['Assembly_Mapping'] = not_run + ['NA']
            runs['Pilon'] = not_run
            runs['MLST'] = not_run + ['NA']

        if args.skipEstimatedCoverage or (run_successfully_estimatedCoverage and not estimatedCoverage < args.estimatedMinimumCoverage):
            if args.skipTrueCoverage or trueCoverage_config is None or (run_successfully_trueCoverage and pass_qc_trueCoverage):
                if args.skipFastQC or (runs['second_FastQC'][1] or (runs['second_FastQC'][1] is None and runs['first_FastQC'][1])) is not False or args.fastQCproceed:
                    unassembled_pe_reads = None
                    assembled_se_reads = None
                    # Run Pear
                    if args.runPear:
                        print '--runPear set. Running Pear'
                        pearMinOverlap = pear.determine_minimum_overlap(args.pearMinOverlap, min_reads_length, max_reads_length)
                        run_successfully, pass_qc, time_taken, failing, unassembled_pe_reads, assembled_se_reads, pear_folder, warning = pear.runPear(fastq_files, threads, outdir, sampleName, fastq_encoding, trimmomatic_run_successfully, pearMinOverlap)
                        runs['Pear'] = [run_successfully, pass_qc, time_taken, failing, warning]
                    else:
                        runs['Pear'] = not_run + ['NA']

                    # Run SPAdes
                    if not args.skipSPAdes:
                        run_successfully, pass_qc, time_taken, failing, contigs_spades, warning = spades.runSpades(sampleName, outdir, threads, unassembled_pe_reads if unassembled_pe_reads is not None else fastq_files, args.spadesNotUseCareful, spadesMaxMemory, args.spadesMinCoverageAssembly, args.spadesMinContigsLength, genomeSize, args.spadesKmers, max_reads_length, args.spadesDefaultKmers, args.spadesMinKmerCovContigs, assembled_se_reads, args.saveExcludedContigs, args.maxNumberContigs)
                        runs['SPAdes'] = [run_successfully, pass_qc, time_taken, failing, warning]

                        if run_successfully:
                            contigs = contigs_spades

                            # Run Assembly Mapping check
                            bam_file = None
                            if not args.skipAssemblyMapping:
                                run_successfully, pass_qc, time_taken, failing, assembly_filtered, bam_file, assemblyMapping_folder, warning = assembly_mapping.runAssemblyMapping(fastq_files, contigs, threads, outdir, args.assemblyMinCoverageContigs, genomeSize, args.saveExcludedContigs, args.maxNumberContigs)
                                runs['Assembly_Mapping'] = [run_successfully, pass_qc, time_taken, failing, warning]

                                if run_successfully:
                                    contigs = assembly_filtered
                                    if not args.keepIntermediateAssemblies and os.path.isfile(contigs_spades) and contigs != contigs_spades:
                                        os.remove(contigs_spades)
                            else:
                                print '--skipAssemblyMapping set. Skipping Assembly Mapping check'
                                runs['Assembly_Mapping'] = skipped + ['NA']

                            # Run Pilon
                            if not args.skipPilon:
                                run_successfully, _, time_taken, failing, assembly_polished, pilon_folder = pilon.runPilon(jar_path_pilon, contigs, fastq_files, threads, outdir, jarMaxMemory, bam_file)
                                runs['Pilon'] = [run_successfully, None, time_taken, failing]

                                if run_successfully:
                                    contigs = assembly_polished
                                    if not args.keepIntermediateAssemblies and 'assembly_filtered' in locals() and os.path.isfile(assembly_filtered):
                                        os.remove(assembly_filtered)

                                if not args.pilonKeepFiles:
                                    utils.removeDirectory(pilon_folder)

                            else:
                                print '--skipPilon set. Skipping Pilon correction'
                                runs['Pilon'] = skipped

                            if 'assemblyMapping_folder' in locals():
                                utils.removeDirectory(assemblyMapping_folder)

                            print '\n' + 'Final assembly: ' + contigs
                            with open(os.path.join(outdir, 'final_assembly.txt'), 'wt') as writer:
                                writer.write(contigs + '\n')

                            # Run MLST
                            if not args.skipMLST:
                                run_successfully, pass_qc, time_taken, failing, warning = mlst.runMlst(contigs, scheme, outdir, species_genus, mlst_scheme_genus)
                                runs['MLST'] = [run_successfully, pass_qc, time_taken, failing, warning]
                            else:
                                print '--skipMLST set. Skipping MLST analysis'
                                runs['MLST'] = skipped + ['NA']
                        else:
                            print 'SPAdes did not run successfully! Skipping Pilon correction, Assembly Mapping check and MLST analysis'
                            runs['Assembly_Mapping'] = skipped + ['NA']
                            runs['Pilon'] = skipped
                            runs['MLST'] = skipped + ['NA']

                    else:
                        print '--skipSPAdes set. Skipping SPAdes, Pilon correction, Assembly Mapping check and MLST analysis'
                        runs['SPAdes'] = skipped + ['NA']
                        runs['Assembly_Mapping'] = skipped + ['NA']
                        runs['Pilon'] = skipped
                        runs['MLST'] = skipped + ['NA']
    else:
        print 'Moving to the next sample'
        for step in ('first_Coverage', 'trueCoverage_ReMatCh', 'first_FastQC', 'Trimmomatic', 'second_Coverage', 'second_FastQC', 'Pear', 'SPAdes', 'Assembly_Mapping', 'Pilon', 'MLST'):
            if step in ('Trimmomatic', 'first_FastQC', 'second_FastQC', 'Pear', 'SPAdes', 'Assembly_Mapping', 'MLST'):
                runs[step] = not_run + ['NA']
            else:
                runs[step] = not_run

    # Remove Pear directory
    if not args.pearKeepFiles and 'pear_folder' in locals():
        utils.removeDirectory(pear_folder)
    # Remove Trimmomatic directory with cleaned reads
    if not args.trimKeepFiles and 'trimmomatic_folder' in locals():
        utils.removeDirectory(trimmomatic_folder)

    # Check run
    run_successfully = all(runs[step][0] or runs[step][0] is None for step in runs)

    pass_fastqIntegrity = runs['FastQ_Integrity'][0]
    pass_cov = (runs['second_Coverage'][1] or (runs['second_Coverage'][1] is None and runs['first_Coverage'][1])) is not False
    pass_trueCov = runs['trueCoverage_ReMatCh'][1] is not False
    pass_fastqc = (runs['second_FastQC'][1] or (runs['second_FastQC'][1] is None and runs['first_FastQC'][1])) is not False
    # pass_trimmomatic = runs['Trimmomatic'][1] is not False
    # pass_pear = runs['Pear'][1] is not False
    # pass_spades = runs['SPAdes'][1] is not False or runs['Assembly_Mapping'][1] is True
    pass_spades = runs['SPAdes'][1] is not False
    pass_assemblyMapping = runs['Assembly_Mapping'][1] is not False
    pass_pilon = runs['Pilon'][0] is not False
    pass_mlst = runs['MLST'][1] is not False
    pass_qc = all([pass_fastqIntegrity, pass_cov, pass_trueCov, pass_fastqc, pass_spades, pass_assemblyMapping, pass_pilon, pass_mlst])

    return run_successfully, pass_qc, runs
Esempio n. 2
0
def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme,
               spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon,
               jarMaxMemory, trueCoverage_config):
    threads = args.threads
    adaptersFasta = args.adapters
    if adaptersFasta is not None:
        adaptersFasta = os.path.abspath(adaptersFasta.name)
    genomeSize = args.genomeSizeExpectedMb
    maximumReadsLength = None
    skipped = [None, None, 0, {'sample': 'Skipped'}]
    not_run = [None, None, 0, {'sample': 'Not run'}]

    runs = {}

    # Run FastQ integrity check
    not_corruption_found, _, time_taken, failing = fastQintegrity.runFastQintegrity(
        fastq_files, threads, outdir)
    runs['FastQ_Integrity'] = [not_corruption_found, None, time_taken, failing]

    if not_corruption_found:
        # Run first Estimated Coverage
        run_successfully_estimatedCoverage = False
        estimatedCoverage = None
        run_successfully_trueCoverage = False
        pass_qc_trueCoverage = False
        if not args.skipEstimatedCoverage:
            # Check whether the Estimated Coverage output is already present
            report_file = os.path.join(outdir, 'coverage_report.txt')
            if os.path.isfile(report_file):
                os.remove(report_file)
            # Run getEstimatedCoverage
            run_successfully_estimatedCoverage, pass_qc, time_taken, failing, estimatedCoverage = coverage.getEstimatedCoverage(
                fastq_files, genomeSize, outdir, threads)
            runs['first_Coverage'] = [
                run_successfully_estimatedCoverage, pass_qc, time_taken,
                failing
            ]
        else:
            print '--skipEstimatedCoverage set. Skipping First Estimated Coverage analysis'
            runs['first_Coverage'] = skipped

        if args.skipEstimatedCoverage or (
                run_successfully_estimatedCoverage
                and not estimatedCoverage < args.estimatedMinimumCoverage):
            if not args.skipTrueCoverage and trueCoverage_config is not None:
                # Run True Coverage
                run_successfully_trueCoverage, pass_qc_trueCoverage, time_taken, failing = trueCoverage.runTrueCoverage(
                    fastq_files, trueCoverage_config['reference_file'],
                    threads, outdir, trueCoverage_config['length_extra_seq'],
                    trueCoverage_config['minimum_depth_presence'],
                    trueCoverage_config['minimum_depth_call'],
                    trueCoverage_config[
                        'minimum_depth_frequency_dominant_allele'],
                    trueCoverage_config['minimum_gene_coverage'],
                    trueCoverage_config['maximum_number_absent_genes'],
                    trueCoverage_config[
                        'maximum_number_genes_multiple_alleles'],
                    trueCoverage_config['minimum_read_coverage'])
                runs['trueCoverage_ReMatCh'] = [
                    run_successfully_trueCoverage, pass_qc_trueCoverage,
                    time_taken, failing
                ]
            else:
                print '\n' + '--skipTrueCoverage set. Skipping True coverage analysis'
                runs['trueCoverage_ReMatCh'] = skipped

            if args.skipTrueCoverage or trueCoverage_config is None or (
                    run_successfully_trueCoverage and pass_qc_trueCoverage):
                # Run first FastQC
                nts2clip_based_ntsContent = None
                if not args.skipFastQC:
                    run_successfully, pass_qc, time_taken, failing, maximumReadsLength, nts2clip_based_ntsContent = fastqc.runFastQCanalysis(
                        outdir, threads, adaptersFasta, fastq_files)
                    runs['first_FastQC'] = [
                        run_successfully, pass_qc, time_taken, failing
                    ]
                else:
                    print '--skipFastQC set. Skipping First FastQC analysis'
                    runs['first_FastQC'] = skipped

                # Run Trimmomatic
                if not args.skipTrimmomatic:
                    run_successfully, not_empty_fastq, time_taken, failing, paired_reads, trimmomatic_folder, fileSize = trimmomatic.runTrimmomatic(
                        jar_path_trimmomatic, sampleName, outdir, threads,
                        adaptersFasta, script_path, args.doNotSearchAdapters,
                        fastq_files, maximumReadsLength, args.doNotTrimCrops,
                        args.trimCrop, args.trimHeadCrop, args.trimLeading,
                        args.trimTrailing, args.trimSlidingWindow,
                        args.trimMinLength, nts2clip_based_ntsContent,
                        jarMaxMemory)
                    runs['Trimmomatic'] = [
                        run_successfully, not_empty_fastq, time_taken, failing,
                        fileSize
                    ]

                    if run_successfully and not_empty_fastq:
                        fastq_files = paired_reads

                        # Run second Estimated Coverage
                        if not args.skipEstimatedCoverage:
                            run_successfully_estimatedCoverage, pass_qc, time_taken, failing, estimatedCoverage = coverage.getEstimatedCoverage(
                                fastq_files, genomeSize, outdir, threads)
                            runs['second_Coverage'] = [
                                run_successfully_estimatedCoverage, pass_qc,
                                time_taken, failing
                            ]
                        else:
                            print '--skipEstimatedCoverage set. Skipping Second Estimated Coverage analysis'
                            runs['second_Coverage'] = skipped

                        if args.skipEstimatedCoverage or (
                                run_successfully_estimatedCoverage
                                and not estimatedCoverage <
                                args.estimatedMinimumCoverage):
                            # Run second FastQC
                            if not args.skipFastQC:
                                run_successfully, pass_qc, time_taken, failing, maximumReadsLength, nts2clip_based_ntsContent = fastqc.runFastQCanalysis(
                                    outdir, threads, adaptersFasta,
                                    fastq_files)
                                runs['second_FastQC'] = [
                                    run_successfully, pass_qc, time_taken,
                                    failing
                                ]
                            else:
                                print '--skipFastQC set. Skipping Second FastQC analysis'
                                runs['second_FastQC'] = skipped
                        else:
                            print '\n' + 'Estimated coverage is too lower (< ' + str(
                                args.estimatedMinimumCoverage
                            ) + 'x). This sample will not proceed with INNUca pipeline'
                            runs['second_FastQC'] = not_run
                            runs['SPAdes'] = not_run
                            runs['Pilon'] = not_run
                            runs['Assembly_Mapping'] = not_run
                            runs['MLST'] = not_run
                    else:
                        print 'Trimmomatic did not run successfully or return zero reads! Skipping Second Estimated Coverage analysis and FastQC analysis'
                        runs['second_Coverage'] = skipped
                        runs['second_FastQC'] = skipped

                else:
                    print '--skipTrimmomatic set. Skipping Trimmomatic, but also Second FastQC analysis and Second Estimated Coverage analysis'
                    runs['Trimmomatic'] = skipped + ['NA']
                    runs['second_Coverage'] = skipped
                    runs['second_FastQC'] = skipped
            else:
                print '\n' + 'This sample does not pass True Coverage module QA/QC. This sample will not proceed with INNUca pipeline'
                runs['first_FastQC'] = not_run
                runs['Trimmomatic'] = not_run + ['NA']
                runs['second_Coverage'] = not_run
                runs['second_FastQC'] = not_run
                runs['SPAdes'] = not_run
                runs['Pilon'] = not_run
                runs['Assembly_Mapping'] = not_run
                runs['MLST'] = not_run

        else:
            print '\n' + 'Estimated coverage is too lower (< ' + str(
                args.estimatedMinimumCoverage
            ) + 'x). This sample will not proceed with INNUca pipeline'
            runs['trueCoverage_ReMatCh'] = not_run
            runs['first_FastQC'] = not_run
            runs['Trimmomatic'] = not_run + ['NA']
            runs['second_Coverage'] = not_run
            runs['second_FastQC'] = not_run
            runs['SPAdes'] = not_run
            runs['Pilon'] = not_run
            runs['Assembly_Mapping'] = not_run
            runs['MLST'] = not_run

        if args.skipEstimatedCoverage or (
                run_successfully_estimatedCoverage
                and not estimatedCoverage < args.estimatedMinimumCoverage):
            if args.skipTrueCoverage or trueCoverage_config is None or (
                    run_successfully_trueCoverage and pass_qc_trueCoverage):
                # Run SPAdes
                if not args.skipSPAdes:
                    run_successfully, pass_qc, time_taken, failing, contigs_spades = spades.runSpades(
                        sampleName, outdir, threads, fastq_files,
                        args.spadesNotUseCareful, spadesMaxMemory,
                        args.spadesMinCoverageAssembly,
                        args.spadesMinContigsLength, genomeSize,
                        args.spadesKmers, maximumReadsLength,
                        args.spadesDefaultKmers, args.spadesMinKmerCovContigs)
                    runs['SPAdes'] = [
                        run_successfully, pass_qc, time_taken, failing
                    ]

                    if run_successfully:
                        # Run Pilon
                        contigs = contigs_spades

                        if not args.skipPilon:
                            run_successfully, _, time_taken, failing, assembly_polished, bam_file, pilon_folder = pilon.runPilon(
                                jar_path_pilon, contigs_spades, fastq_files,
                                threads, outdir, jarMaxMemory)
                            runs['Pilon'] = [
                                run_successfully, None, time_taken, failing
                            ]

                            if run_successfully:
                                contigs = assembly_polished

                            # Run Assembly Mapping check
                            if bam_file is not None:
                                if not args.skipAssemblyMapping:
                                    run_successfully, pass_qc, time_taken, failing, assembly_filtered = assembly_mapping.runAssemblyMapping(
                                        bam_file, contigs_spades, threads,
                                        outdir,
                                        args.assemblyMinCoverageContigs,
                                        assembly_polished, genomeSize)
                                    runs['Assembly_Mapping'] = [
                                        run_successfully, pass_qc, time_taken,
                                        failing
                                    ]

                                    if run_successfully:
                                        contigs = assembly_filtered
                                else:
                                    print '--skipAssemblyMapping set. Skipping Assembly Mapping check'
                                    runs['Assembly_Mapping'] = skipped
                            else:
                                print 'Pilon did not produce the bam file! Assembly Mapping check'
                                runs['Assembly_Mapping'] = skipped

                            if not args.pilonKeepFiles:
                                utils.removeDirectory(pilon_folder)

                        else:
                            print '--skipPilon set. Skipping Pilon correction and Assembly Mapping check'
                            runs['Pilon'] = skipped
                            runs['Assembly_Mapping'] = skipped

                        print '\n' + 'Final assembly: ' + contigs
                        with open(os.path.join(outdir, 'final_assembly.txt'),
                                  'wt') as writer:
                            writer.write(contigs + '\n')

                        # Run MLST
                        if not args.skipMLST:
                            runs['MLST'] = mlst.runMlst(
                                contigs, scheme, outdir)
                        else:
                            print '--skipMLST set. Skipping MLST analysis'
                            runs['MLST'] = skipped
                    else:
                        print 'SPAdes did not run successfully! Skipping Pilon correction, Assembly Mapping check and MLST analysis'
                        runs['Pilon'] = skipped
                        runs['Assembly_Mapping'] = skipped
                        runs['MLST'] = skipped

                else:
                    print '--skipSPAdes set. Skipping SPAdes Pilon correction, Assembly Mapping check and MLST analysis'
                    runs['SPAdes'] = skipped
                    runs['Pilon'] = skipped
                    runs['Assembly_Mapping'] = skipped
                    runs['MLST'] = skipped
    else:
        print 'Moving to the next sample'
        for step in ('first_Coverage', 'trueCoverage_ReMatCh', 'first_FastQC',
                     'Trimmomatic', 'second_Coverage', 'second_FastQC',
                     'SPAdes', 'Pilon', 'Assembly_Mapping', 'MLST'):
            if step == 'Trimmomatic':
                runs[step] = not_run + ['NA']
            else:
                runs[step] = not_run

    # Remove Trimmomatic directory with cleaned reads
    if not args.trimKeepFiles:
        try:
            utils.removeDirectory(trimmomatic_folder)
        except:
            print 'It is not possible to remove Trimmomatic directory because Trimmomatic did not run'

    # Check run
    run_successfully = all(runs[step][0] or runs[step][0] is None
                           for step in runs)

    pass_fastqIntegrity = runs['FastQ_Integrity'][0]
    pass_cov = (runs['second_Coverage'][1]
                or (runs['second_Coverage'][1] is None
                    and runs['first_Coverage'][1])) is not False
    pass_trueCov = runs['trueCoverage_ReMatCh'][1] is not False
    pass_fastqc = (runs['second_FastQC'][1]
                   or (runs['second_FastQC'][1] is None
                       and runs['first_FastQC'][1])) is not False
    pass_trimmomatic = runs['Trimmomatic'][1] is not False
    pass_spades = runs['SPAdes'][1] is not False
    pass_assemblyMapping = runs['Assembly_Mapping'][1] is not False
    pass_mlst = runs['MLST'][1] is not False
    pass_qc = all([
        pass_fastqIntegrity, pass_cov, pass_trueCov, pass_fastqc,
        pass_trimmomatic, pass_spades, pass_assemblyMapping, pass_mlst
    ])

    return run_successfully, pass_qc, runs
Esempio n. 3
0
def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme,
               spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon,
               jarMaxMemory, trueCoverage_config, rematch_script,
               species_genus, mlst_scheme_genus):
    threads = args.threads
    adaptersFasta = args.adapters
    if adaptersFasta is not None:
        adaptersFasta = os.path.abspath(adaptersFasta.name)
    genomeSize = args.genomeSizeExpectedMb
    skipped = [None, None, 0, {'sample': 'Skipped'}]
    not_run = [None, None, 0, {'sample': 'Not run'}]

    runs = {}

    # Run FastQ integrity check
    not_corruption_found, pass_qc, time_taken, failing, fastq_encoding, min_reads_length, max_reads_length = fastQintegrity.runFastQintegrity(
        fastq_files, threads, outdir)
    runs['FastQ_Integrity'] = [
        not_corruption_found, pass_qc, time_taken, failing
    ]

    if not_corruption_found:
        # Run first Estimated Coverage
        run_successfully_estimatedCoverage = False
        estimatedCoverage = None
        run_successfully_trueCoverage = False
        pass_qc_trueCoverage = False
        if not args.skipEstimatedCoverage:
            # Check whether the Estimated Coverage output is already present
            report_file = os.path.join(outdir, 'coverage_report.txt')
            if os.path.isfile(report_file):
                os.remove(report_file)
            # Run getEstimatedCoverage
            run_successfully_estimatedCoverage, pass_qc, time_taken, failing, estimatedCoverage = coverage.getEstimatedCoverage(
                fastq_files, genomeSize, outdir, threads,
                args.estimatedMinimumCoverage)
            runs['first_Coverage'] = [
                run_successfully_estimatedCoverage, pass_qc, time_taken,
                failing
            ]
        else:
            print '--skipEstimatedCoverage set. Skipping First Estimated Coverage analysis'
            runs['first_Coverage'] = skipped

        trimmomatic_run_successfully = False

        if args.skipEstimatedCoverage or (
                run_successfully_estimatedCoverage
                and not estimatedCoverage < args.estimatedMinimumCoverage):
            if not args.skipTrueCoverage and trueCoverage_config is not None:
                # Run True Coverage
                run_successfully_trueCoverage, pass_qc_trueCoverage, time_taken, failing = trueCoverage.runTrueCoverage(
                    sampleName, fastq_files,
                    trueCoverage_config['reference_file'], threads, outdir,
                    trueCoverage_config['length_extra_seq'],
                    trueCoverage_config['minimum_depth_presence'],
                    trueCoverage_config['minimum_depth_call'],
                    trueCoverage_config[
                        'minimum_depth_frequency_dominant_allele'],
                    trueCoverage_config['minimum_gene_coverage'], False, False,
                    1, trueCoverage_config['minimum_gene_identity'],
                    trueCoverage_config, rematch_script)
                runs['trueCoverage_ReMatCh'] = [
                    run_successfully_trueCoverage, pass_qc_trueCoverage,
                    time_taken, failing
                ]
            else:
                print '\n' + '--skipTrueCoverage set. Skipping True coverage analysis'
                runs['trueCoverage_ReMatCh'] = skipped

            if args.skipTrueCoverage or trueCoverage_config is None or (
                    run_successfully_trueCoverage and pass_qc_trueCoverage):
                # Run first FastQC
                nts2clip_based_ntsContent = None
                if not args.skipFastQC:
                    run_successfully, pass_qc, time_taken, failing, warning, maximum_reads_length, nts2clip_based_ntsContent = fastqc.runFastQCanalysis(
                        outdir, threads, adaptersFasta, fastq_files,
                        args.fastQCkeepFiles, 'first_run')
                    runs['first_FastQC'] = [
                        run_successfully, pass_qc, time_taken, failing, warning
                    ]
                else:
                    print '--skipFastQC set. Skipping First FastQC analysis'
                    runs['first_FastQC'] = skipped + ['NA']

                # Run Trimmomatic
                if not args.skipTrimmomatic:
                    run_successfully, not_empty_fastq, time_taken, failing, paired_reads, trimmomatic_folder, fileSize = trimmomatic.runTrimmomatic(
                        jar_path_trimmomatic, sampleName, outdir, threads,
                        adaptersFasta, script_path, args.doNotSearchAdapters,
                        fastq_files, max_reads_length, args.doNotTrimCrops,
                        args.trimCrop, args.trimHeadCrop, args.trimLeading,
                        args.trimTrailing, args.trimSlidingWindow,
                        args.trimMinLength, nts2clip_based_ntsContent,
                        jarMaxMemory, fastq_encoding)
                    runs['Trimmomatic'] = [
                        run_successfully, None, time_taken, failing, fileSize
                    ]
                    trimmomatic_run_successfully = run_successfully

                    if run_successfully and not_empty_fastq:
                        fastq_files = paired_reads
                        min_reads_length = args.trimMinLength

                        # Run second Estimated Coverage
                        if not args.skipEstimatedCoverage:
                            run_successfully_estimatedCoverage, pass_qc, time_taken, failing, estimatedCoverage = coverage.getEstimatedCoverage(
                                fastq_files, genomeSize, outdir, threads,
                                args.estimatedMinimumCoverage)
                            runs['second_Coverage'] = [
                                run_successfully_estimatedCoverage, pass_qc,
                                time_taken, failing
                            ]
                        else:
                            print '--skipEstimatedCoverage set. Skipping Second Estimated Coverage analysis'
                            runs['second_Coverage'] = skipped

                        if args.skipEstimatedCoverage or (
                                run_successfully_estimatedCoverage
                                and not estimatedCoverage <
                                args.estimatedMinimumCoverage):
                            # Run second FastQC
                            if not args.skipFastQC:
                                run_successfully, pass_qc, time_taken, failing, warning, maximum_reads_length, nts2clip_based_ntsContent = fastqc.runFastQCanalysis(
                                    outdir, threads, adaptersFasta,
                                    fastq_files, args.fastQCkeepFiles,
                                    'second_run')
                                runs['second_FastQC'] = [
                                    run_successfully, pass_qc, time_taken,
                                    failing, warning
                                ]
                                if run_successfully:
                                    max_reads_length = maximum_reads_length
                            else:
                                print '--skipFastQC set. Skipping Second FastQC analysis'
                                runs['second_FastQC'] = skipped + ['NA']
                        else:
                            print '\n' + 'Estimated coverage is too lower (< ' + str(
                                args.estimatedMinimumCoverage
                            ) + 'x). This sample will not proceed with INNUca pipeline'
                            runs['second_FastQC'] = not_run + ['NA']
                            runs['Pear'] = not_run + ['NA']
                            runs['SPAdes'] = not_run + ['NA']
                            runs['Assembly_Mapping'] = not_run + ['NA']
                            runs['Pilon'] = not_run
                            runs['MLST'] = not_run + ['NA']
                    else:
                        print 'Trimmomatic did not run successfully or return zero reads! Skipping Second Estimated Coverage analysis and FastQC analysis'
                        runs['second_Coverage'] = skipped
                        runs['second_FastQC'] = skipped + ['NA']

                else:
                    print '--skipTrimmomatic set. Skipping Trimmomatic, but also Second FastQC analysis and Second Estimated Coverage analysis'
                    runs['Trimmomatic'] = skipped + ['NA']
                    runs['second_Coverage'] = skipped
                    runs['second_FastQC'] = skipped + ['NA']

                if not args.skipFastQC and (
                        runs['second_FastQC'][1] or
                    (runs['second_FastQC'][1] is None and runs['first_FastQC']
                     [1])) is False and not args.fastQCproceed:
                    print '\n' + 'This sample does not pass FastQC module QA/QC. It will not proceed with INNUca pipeline'
                    runs['Pear'] = not_run + ['NA']
                    runs['SPAdes'] = not_run + ['NA']
                    runs['Assembly_Mapping'] = not_run + ['NA']
                    runs['Pilon'] = not_run
                    runs['MLST'] = not_run + ['NA']
            else:
                print '\n' + 'This sample does not pass True Coverage module QA/QC. This sample will not proceed with INNUca pipeline'
                runs['first_FastQC'] = not_run + ['NA']
                runs['Trimmomatic'] = not_run + ['NA']
                runs['second_Coverage'] = not_run
                runs['second_FastQC'] = not_run + ['NA']
                runs['Pear'] = not_run + ['NA']
                runs['SPAdes'] = not_run + ['NA']
                runs['Assembly_Mapping'] = not_run + ['NA']
                runs['Pilon'] = not_run
                runs['MLST'] = not_run + ['NA']

        else:
            print '\n' + 'Estimated coverage is too lower (< ' + str(
                args.estimatedMinimumCoverage
            ) + 'x). This sample will not proceed with INNUca pipeline'
            runs['trueCoverage_ReMatCh'] = not_run
            runs['first_FastQC'] = not_run + ['NA']
            runs['Trimmomatic'] = not_run + ['NA']
            runs['second_Coverage'] = not_run
            runs['second_FastQC'] = not_run + ['NA']
            runs['Pear'] = not_run + ['NA']
            runs['SPAdes'] = not_run + ['NA']
            runs['Assembly_Mapping'] = not_run + ['NA']
            runs['Pilon'] = not_run
            runs['MLST'] = not_run + ['NA']

        if args.skipEstimatedCoverage or (
                run_successfully_estimatedCoverage
                and not estimatedCoverage < args.estimatedMinimumCoverage):
            if args.skipTrueCoverage or trueCoverage_config is None or (
                    run_successfully_trueCoverage and pass_qc_trueCoverage):
                if args.skipFastQC or (runs['second_FastQC'][1] or
                                       (runs['second_FastQC'][1] is None
                                        and runs['first_FastQC'][1])
                                       ) is not False or args.fastQCproceed:
                    unassembled_pe_reads = None
                    assembled_se_reads = None
                    # Run Pear
                    if args.runPear:
                        print '--runPear set. Running Pear'
                        pearMinOverlap = pear.determine_minimum_overlap(
                            args.pearMinOverlap, min_reads_length,
                            max_reads_length)
                        run_successfully, pass_qc, time_taken, failing, unassembled_pe_reads, assembled_se_reads, pear_folder, warning = pear.runPear(
                            fastq_files, threads, outdir, sampleName,
                            fastq_encoding, trimmomatic_run_successfully,
                            pearMinOverlap)
                        runs['Pear'] = [
                            run_successfully, pass_qc, time_taken, failing,
                            warning
                        ]
                    else:
                        runs['Pear'] = not_run + ['NA']

                    # Run SPAdes
                    if not args.skipSPAdes:
                        run_successfully, pass_qc, time_taken, failing, contigs_spades, warning = spades.runSpades(
                            sampleName, outdir, threads,
                            unassembled_pe_reads if unassembled_pe_reads
                            is not None else fastq_files,
                            args.spadesNotUseCareful, spadesMaxMemory,
                            args.spadesMinCoverageAssembly,
                            args.spadesMinContigsLength, genomeSize,
                            args.spadesKmers, max_reads_length,
                            args.spadesDefaultKmers,
                            args.spadesMinKmerCovContigs, assembled_se_reads,
                            args.saveExcludedContigs, args.maxNumberContigs)
                        runs['SPAdes'] = [
                            run_successfully, pass_qc, time_taken, failing,
                            warning
                        ]

                        if run_successfully:
                            contigs = contigs_spades

                            # Run Assembly Mapping check
                            bam_file = None
                            if not args.skipAssemblyMapping:
                                run_successfully, pass_qc, time_taken, failing, assembly_filtered, bam_file, assemblyMapping_folder, warning = assembly_mapping.runAssemblyMapping(
                                    fastq_files, contigs, threads, outdir,
                                    args.assemblyMinCoverageContigs,
                                    genomeSize, args.saveExcludedContigs,
                                    args.maxNumberContigs)
                                runs['Assembly_Mapping'] = [
                                    run_successfully, pass_qc, time_taken,
                                    failing, warning
                                ]

                                if run_successfully:
                                    contigs = assembly_filtered
                                    if not args.keepIntermediateAssemblies and os.path.isfile(
                                            contigs_spades
                                    ) and contigs != contigs_spades:
                                        os.remove(contigs_spades)
                            else:
                                print '--skipAssemblyMapping set. Skipping Assembly Mapping check'
                                runs['Assembly_Mapping'] = skipped + ['NA']

                            # Run Pilon
                            if not args.skipPilon:
                                run_successfully, _, time_taken, failing, assembly_polished, pilon_folder = pilon.runPilon(
                                    jar_path_pilon, contigs, fastq_files,
                                    threads, outdir, jarMaxMemory, bam_file)
                                runs['Pilon'] = [
                                    run_successfully, None, time_taken, failing
                                ]

                                if run_successfully:
                                    contigs = assembly_polished
                                    if not args.keepIntermediateAssemblies and 'assembly_filtered' in locals(
                                    ) and os.path.isfile(assembly_filtered):
                                        os.remove(assembly_filtered)

                                if not args.pilonKeepFiles:
                                    utils.removeDirectory(pilon_folder)

                            else:
                                print '--skipPilon set. Skipping Pilon correction'
                                runs['Pilon'] = skipped

                            if 'assemblyMapping_folder' in locals():
                                utils.removeDirectory(assemblyMapping_folder)

                            print '\n' + 'Final assembly: ' + contigs
                            with open(
                                    os.path.join(outdir, 'final_assembly.txt'),
                                    'wt') as writer:
                                writer.write(contigs + '\n')

                            # Run MLST
                            if not args.skipMLST:
                                run_successfully, pass_qc, time_taken, failing, warning = mlst.runMlst(
                                    contigs, scheme, outdir, species_genus,
                                    mlst_scheme_genus)
                                runs['MLST'] = [
                                    run_successfully, pass_qc, time_taken,
                                    failing, warning
                                ]
                            else:
                                print '--skipMLST set. Skipping MLST analysis'
                                runs['MLST'] = skipped + ['NA']
                        else:
                            print 'SPAdes did not run successfully! Skipping Pilon correction, Assembly Mapping check and MLST analysis'
                            runs['Assembly_Mapping'] = skipped + ['NA']
                            runs['Pilon'] = skipped
                            runs['MLST'] = skipped + ['NA']

                    else:
                        print '--skipSPAdes set. Skipping SPAdes, Pilon correction, Assembly Mapping check and MLST analysis'
                        runs['SPAdes'] = skipped + ['NA']
                        runs['Assembly_Mapping'] = skipped + ['NA']
                        runs['Pilon'] = skipped
                        runs['MLST'] = skipped + ['NA']
    else:
        print 'Moving to the next sample'
        for step in ('first_Coverage', 'trueCoverage_ReMatCh', 'first_FastQC',
                     'Trimmomatic', 'second_Coverage', 'second_FastQC', 'Pear',
                     'SPAdes', 'Assembly_Mapping', 'Pilon', 'MLST'):
            if step in ('Trimmomatic', 'first_FastQC', 'second_FastQC', 'Pear',
                        'SPAdes', 'Assembly_Mapping', 'MLST'):
                runs[step] = not_run + ['NA']
            else:
                runs[step] = not_run

    # Remove Pear directory
    if not args.pearKeepFiles and 'pear_folder' in locals():
        utils.removeDirectory(pear_folder)
    # Remove Trimmomatic directory with cleaned reads
    if not args.trimKeepFiles and 'trimmomatic_folder' in locals():
        utils.removeDirectory(trimmomatic_folder)

    # Check run
    run_successfully = all(runs[step][0] or runs[step][0] is None
                           for step in runs)

    pass_fastqIntegrity = runs['FastQ_Integrity'][0]
    pass_cov = (runs['second_Coverage'][1]
                or (runs['second_Coverage'][1] is None
                    and runs['first_Coverage'][1])) is not False
    pass_trueCov = runs['trueCoverage_ReMatCh'][1] is not False
    pass_fastqc = (runs['second_FastQC'][1]
                   or (runs['second_FastQC'][1] is None
                       and runs['first_FastQC'][1])) is not False
    # pass_trimmomatic = runs['Trimmomatic'][1] is not False
    # pass_pear = runs['Pear'][1] is not False
    # pass_spades = runs['SPAdes'][1] is not False or runs['Assembly_Mapping'][1] is True
    pass_spades = runs['SPAdes'][1] is not False
    pass_assemblyMapping = runs['Assembly_Mapping'][1] is not False
    pass_pilon = runs['Pilon'][0] is not False
    pass_mlst = runs['MLST'][1] is not False
    pass_qc = all([
        pass_fastqIntegrity, pass_cov, pass_trueCov, pass_fastqc, pass_spades,
        pass_assemblyMapping, pass_pilon, pass_mlst
    ])

    return run_successfully, pass_qc, runs
Esempio n. 4
0
def run_innuca(sample_name,
               outdir,
               fastq_files,
               args,
               script_path,
               scheme,
               spades_max_memory,
               jar_path_trimmomatic,
               jar_path_pilon,
               jar_max_memory,
               true_coverage_config,
               rematch_script,
               species_genus,
               mlst_scheme_genus,
               spades_version=None):
    threads = args.threads
    adapters_fasta = args.adapters
    if adapters_fasta is not None:
        adapters_fasta = os.path.abspath(adapters_fasta.name)
    genome_size = args.genomeSizeExpectedMb
    # run_successfully, pass_qc, time_taken, failing, warning, file_size
    skipped = [None, None, 0, {'sample': 'Skipped'}, {}, 'NA']
    not_run = [None, None, 0, {'sample': 'Not run'}, {}, 'NA']

    runs = {}

    # Run FastQ integrity check
    not_corruption_found, pass_qc, time_taken, failing, fastq_encoding, min_reads_length, max_reads_length = \
        fastQintegrity.runFastQintegrity(fastq_files, threads, outdir)
    runs['FastQ_Integrity'] = [
        not_corruption_found, pass_qc, time_taken, failing, {}, 'NA'
    ]

    pear_folder = None
    trimmomatic_folder = None
    if not_corruption_found:
        # Run Kraken
        # most_abundant_taxon_percent = None
        run_successfully_kraken = False
        run_successfully_estimated_coverage = False
        estimated_coverage = None
        run_successfully_true_coverage = False
        pass_qc_true_coverage = False

        trimmomatic_run_successfully = False
        if args.runKraken:
            print('\n' '--runKraken set. Running Kraken for reads')
            run_successfully_kraken, pass_qc, time_taken, failing, warning, _ = \
                kraken(species=args.speciesExpected, files_to_classify=fastq_files, kraken_db=args.krakenDB,
                       files_type='fastq', outdir=outdir, version_kraken=version_kraken_global,
                       db_mem=args.krakenMemory, quick=args.krakenQuick, min_percent_covered=args.krakenMinCov,
                       max_unclassified_frag=args.krakenMaxUnclass, min_base_quality=args.krakenMinQual,
                       threads=threads)
            runs['reads_Kraken'] = [
                run_successfully_kraken, pass_qc, time_taken, failing, warning,
                'NA'
            ]
        else:
            runs['reads_Kraken'] = skipped

        if args.runKraken and \
                (run_successfully_kraken and not pass_qc) and \
                not args.krakenProceed and \
                not args.krakenIgnoreQC:
            print(
                '\n'
                'This sample does not pass Kraken module QA/QC. It will not proceed with INNUca pipeline'
            )
        else:
            # Run first Estimated Coverage
            if not args.skipEstimatedCoverage:
                # Check whether the Estimated Coverage output is already present
                report_file = os.path.join(outdir, 'coverage_report.txt')
                if os.path.isfile(report_file):
                    os.remove(report_file)
                # Run getEstimatedCoverage
                run_successfully_estimated_coverage, pass_qc, time_taken, failing, estimated_coverage = \
                    coverage.getEstimatedCoverage(fastq_files, genome_size, outdir, threads,
                                                  args.estimatedMinimumCoverage)
                runs['first_Coverage'] = [
                    run_successfully_estimated_coverage, pass_qc, time_taken,
                    failing, {}, 'NA'
                ]
            else:
                print(
                    '--skipEstimatedCoverage set. Skipping First Estimated Coverage analysis'
                )
                runs['first_Coverage'] = skipped

            # # Correct first estimation coverage with Kraken percentage
            # # Does not seem to be a good idea (at least for Streptococcus agalactiae)
            # if args.runKraken and \
            #         (runs['Kraken'][0] and runs['Kraken'][1]) and \
            #         most_abundant_taxon_percent is not None and \
            #         estimated_coverage is not None:
            #     new_estimation = estimated_coverage * (most_abundant_taxon_percent / 100)
            #     print('\n'
            #           'Correct estimated coverage ({estimated}x) with Kraken taxon percentage'
            #           ' coverage ({percent}%): {new_estimation}x'.format(estimated=estimated_coverage,
            #                                                              percent=most_abundant_taxon_percent,
            #                                                              new_estimation=new_estimation))
            #     estimated_coverage = new_estimation

            if args.skipEstimatedCoverage or (
                    run_successfully_estimated_coverage and
                    not estimated_coverage < args.estimatedMinimumCoverage):
                if not args.skipTrueCoverage and true_coverage_config is not None:
                    # Run True Coverage
                    run_successfully_true_coverage, pass_qc_true_coverage, time_taken, failing, _ = \
                        trueCoverage.run_true_coverage(sample_name, fastq_files, true_coverage_config['reference_file'],
                                                       threads, outdir,
                                                       true_coverage_config['length_extra_seq'],
                                                       true_coverage_config['minimum_depth_presence'],
                                                       true_coverage_config['minimum_depth_call'],
                                                       true_coverage_config['minimum_depth_frequency_dominant_allele'],
                                                       true_coverage_config['minimum_gene_coverage'], False,
                                                       true_coverage_config['minimum_gene_identity'],
                                                       true_coverage_config, rematch_script, num_map_loc=1,
                                                       bowtie_algorithm=args.trueCoverageBowtieAlgo,
                                                       clean_run_rematch=True)
                    runs['trueCoverage_ReMatCh'] = [
                        run_successfully_true_coverage, pass_qc_true_coverage,
                        time_taken, failing, {}, 'NA'
                    ]
                else:
                    print(
                        '\n' +
                        '--skipTrueCoverage set. Skipping True coverage analysis'
                    )
                    runs['trueCoverage_ReMatCh'] = skipped

                if args.skipTrueCoverage or true_coverage_config is None or args.trueCoverageProceed or \
                        (run_successfully_true_coverage and pass_qc_true_coverage):
                    # Run first FastQC
                    nts2clip_based_nts_content = None
                    if not args.skipFastQC:
                        run_successfully, pass_qc, time_taken, failing, warning, maximum_reads_length, \
                            nts2clip_based_nts_content = fastqc.runFastQCanalysis(outdir, threads, adapters_fasta,
                                                                                  fastq_files, args.fastQCkeepFiles,
                                                                                  'first_run')
                        runs['first_FastQC'] = [
                            run_successfully, pass_qc, time_taken, failing,
                            warning, 'NA'
                        ]
                    else:
                        print(
                            '--skipFastQC set. Skipping First FastQC analysis')
                        runs['first_FastQC'] = skipped

                    # Run Trimmomatic
                    not_empty_fastq = True
                    if not args.skipTrimmomatic:
                        run_successfully, not_empty_fastq, time_taken, failing, paired_reads, trimmomatic_folder, \
                            file_size, warning = trimmomatic.runTrimmomatic(jar_path_trimmomatic, sample_name, outdir,
                                                                            threads, adapters_fasta, script_path,
                                                                            args.doNotSearchAdapters, fastq_files,
                                                                            max_reads_length, args.doNotTrimCrops,
                                                                            args.trimCrop, args.trimHeadCrop,
                                                                            args.trimLeading, args.trimTrailing,
                                                                            args.trimSlidingWindow, args.trimMinLength,
                                                                            nts2clip_based_nts_content, jar_max_memory,
                                                                            fastq_encoding)
                        runs['Trimmomatic'] = [
                            run_successfully, None, time_taken, failing,
                            warning, file_size
                        ]
                        trimmomatic_run_successfully = run_successfully

                        if run_successfully and not_empty_fastq:
                            fastq_files = paired_reads
                            min_reads_length = args.trimMinLength

                            # Run second Estimated Coverage
                            if not args.skipEstimatedCoverage:
                                run_successfully_estimated_coverage, pass_qc, time_run, failing, estimated_coverage = \
                                    coverage.getEstimatedCoverage(fastq_files, genome_size, outdir, threads,
                                                                  args.estimatedMinimumCoverage)
                                runs['second_Coverage'] = [
                                    run_successfully_estimated_coverage,
                                    pass_qc, time_run, failing, {}, 'NA'
                                ]
                            else:
                                print(
                                    '--skipEstimatedCoverage set. Skipping Second Estimated Coverage analysis'
                                )
                                runs['second_Coverage'] = skipped

                            if args.skipEstimatedCoverage or (
                                    run_successfully_estimated_coverage
                                    and not estimated_coverage <
                                    args.estimatedMinimumCoverage):
                                # Run second FastQC
                                if not args.skipFastQC:
                                    run_successfully, pass_qc, time_taken, failing, warning, maximum_reads_length, \
                                        nts2clip_based_nts_content = fastqc.runFastQCanalysis(outdir, threads,
                                                                                              adapters_fasta,
                                                                                              fastq_files,
                                                                                              args.fastQCkeepFiles,
                                                                                              'second_run')
                                    runs['second_FastQC'] = [
                                        run_successfully, pass_qc, time_taken,
                                        failing, warning, 'NA'
                                    ]
                                    if run_successfully:
                                        max_reads_length = maximum_reads_length
                                else:
                                    print(
                                        '--skipFastQC set. Skipping Second FastQC analysis'
                                    )
                                    runs['second_FastQC'] = skipped
                            else:
                                print(
                                    '\n'
                                    'Estimated coverage is too lower (< {estimatedMinimumCoverage}x). This sample'
                                    ' will not proceed with INNUca'
                                    ' pipeline'.format(
                                        estimatedMinimumCoverage=args.
                                        estimatedMinimumCoverage))
                                runs['second_FastQC'] = skipped
                        else:
                            print(
                                'Trimmomatic did not run successfully or return zero reads! Skipping Second Estimated'
                                ' Coverage analysis and FastQC analysis')
                            runs['second_Coverage'] = skipped
                            runs['second_FastQC'] = skipped
                    else:
                        print(
                            '--skipTrimmomatic set. Skipping Trimmomatic, but also Second FastQC analysis and Second'
                            ' Estimated Coverage analysis')
                        runs['Trimmomatic'] = skipped
                        runs['second_Coverage'] = skipped
                        runs['second_FastQC'] = skipped

                    if not args.skipFastQC and \
                            (runs['second_FastQC'][1] or (runs['second_FastQC'][1] is None and
                                                          runs['first_FastQC'][1])) is False and \
                            not not_empty_fastq and not args.fastQCproceed:
                        print(
                            '\n'
                            'This sample does not pass FastQC module QA/QC. It will not proceed with INNUca pipeline'
                        )
                else:
                    print(
                        '\n'
                        'This sample does not pass True Coverage module QA/QC. This sample will not proceed with'
                        ' INNUca pipeline')
            else:
                print(
                    '\n'
                    'Estimated coverage is too lower (< {estimatedMinimumCoverage}x). This sample will not proceed'
                    ' with INNUca pipeline'.format(
                        estimatedMinimumCoverage=args.estimatedMinimumCoverage)
                )

        continue_second_part = False
        if not args.runKraken or \
                (runs['reads_Kraken'][0] is True and runs['reads_Kraken'][1] is True) or \
                args.krakenProceed or \
                args.krakenIgnoreQC:
            if args.skipEstimatedCoverage or (
                    run_successfully_estimated_coverage and
                    not estimated_coverage < args.estimatedMinimumCoverage):
                if args.skipTrueCoverage or true_coverage_config is None or args.trueCoverageProceed or \
                        (run_successfully_true_coverage and pass_qc_true_coverage):
                    if args.skipFastQC or (runs['second_FastQC'][1] or
                                           (runs['second_FastQC'][1] is None and
                                            runs['first_FastQC'][1])) is not False or \
                            args.fastQCproceed:
                        continue_second_part = True

        if continue_second_part:
            unassembled_pe_reads = None
            assembled_se_reads = None
            # Run Pear
            if args.runPear:
                print('--runPear set. Running Pear')
                pear_min_overlap = pear.determine_minimum_overlap(
                    args.pearMinOverlap, min_reads_length, max_reads_length)
                run_successfully, pass_qc, time_taken, failing, unassembled_pe_reads, assembled_se_reads, \
                    pear_folder, warning = pear.runPear(fastq_files, threads, outdir, sample_name,
                                                        fastq_encoding, trimmomatic_run_successfully,
                                                        pear_min_overlap)
                runs['Pear'] = [
                    run_successfully, pass_qc, time_taken, failing, warning,
                    'NA'
                ]
            else:
                runs['Pear'] = skipped

            # Run SPAdes
            if not args.skipSPAdes:
                run_successfully, pass_qc, time_taken, failing, contigs_spades, warning = \
                    spades.run_spades(sample_name, outdir, threads,
                                      unassembled_pe_reads if unassembled_pe_reads is not None else fastq_files,
                                      args.spadesNotUseCareful, spades_max_memory,
                                      args.spadesMinCoverageAssembly, args.spadesMinContigsLength, genome_size,
                                      args.spadesKmers, max_reads_length, args.spadesDefaultKmers,
                                      args.spadesMinKmerCovContigs, assembled_se_reads, args.saveExcludedContigs,
                                      args.maxNumberContigs, args.keepSPAdesScaffolds, spades_version=spades_version,
                                      estimated_coverage=estimated_coverage,
                                      spades_not_use_isolate=args.spadesNotUseIsolate)
                runs['SPAdes'] = [
                    run_successfully, pass_qc, time_taken, failing, warning,
                    'NA'
                ]

                if run_successfully:
                    contigs = contigs_spades

                    # Run Assembly Mapping check
                    bam_file = None
                    original_bam = None
                    assembly_mapping_folder = None
                    possible_assemblies_bam_remove = {}
                    if not args.skipAssemblyMapping:
                        run_successfully, pass_qc, time_taken, failing, assembly_filtered, bam_file, \
                            assembly_mapping_folder, warning, original_bam = \
                            assembly_mapping.run_assembly_mapping(fastq_files=fastq_files, reference_file=contigs,
                                                                  outdir=outdir, estimated_genome_size_mb=genome_size,
                                                                  max_number_contigs=args.maxNumberContigs,
                                                                  save_excluded_contigs=args.saveExcludedContigs,
                                                                  min_coverage_assembly=args.assemblyMinCoverageContigs,
                                                                  keep_bam=args.keepBAM, threads=threads)
                        runs['Assembly_Mapping'] = [
                            run_successfully, pass_qc, time_taken, failing,
                            warning, 'NA'
                        ]

                        if run_successfully:
                            # Assembly to remove
                            if not args.keepIntermediateAssemblies:
                                if os.path.isfile(contigs_spades) and \
                                        assembly_filtered is not None and \
                                        assembly_filtered != contigs_spades:
                                    if not args.keepBAM:
                                        os.remove(contigs_spades)
                                    else:
                                        possible_assemblies_bam_remove[
                                            'assembly_mapping'] = contigs_spades

                            if assembly_filtered is not None and \
                                    assembly_filtered != contigs_spades and \
                                    os.path.isfile(assembly_filtered):
                                contigs = assembly_filtered
                    else:
                        print(
                            '--skipAssemblyMapping set. Skipping Assembly Mapping check'
                        )
                        runs['Assembly_Mapping'] = skipped

                    # Run Pilon
                    pilon_new_bam = False
                    pilon_bam = None
                    if not args.skipPilon:
                        run_successfully, _, time_taken, failing, assembly_polished, pilon_folder, pilon_new_bam, \
                            pilon_bam = pilon.run_pilon(jar_path_pilon=jar_path_pilon, assembly=contigs,
                                                        fastq_files=fastq_files, outdir=outdir,
                                                        jar_max_memory=jar_max_memory, alignment_file=bam_file,
                                                        keep_bam=args.keepBAM, threads=threads)
                        runs['Pilon'] = [
                            run_successfully, None, time_taken, failing, {},
                            'NA'
                        ]

                        if run_successfully:
                            if not args.keepIntermediateAssemblies:
                                if os.path.isfile(contigs) and \
                                        assembly_polished is not None and \
                                        os.path.isfile(assembly_polished):
                                    if not args.keepBAM:
                                        os.remove(contigs)
                                    else:
                                        if not pilon_new_bam:
                                            possible_assemblies_bam_remove[
                                                'pilon'] = contigs

                            if assembly_polished is not None and \
                                    os.path.isfile(assembly_polished):
                                contigs = assembly_polished

                        if not args.pilonKeepFiles and os.path.isdir(
                                pilon_folder):
                            utils.removeDirectory(pilon_folder)

                    else:
                        print('--skipPilon set. Skipping Pilon correction')
                        runs['Pilon'] = skipped

                    if not args.keepBAM:
                        if bam_file is not None:
                            if os.path.isfile(bam_file):
                                os.remove(bam_file)
                            if os.path.isfile(bam_file + '.bai'):
                                os.remove(bam_file + '.bai')

                        if original_bam is not None and os.path.isfile(
                                original_bam):
                            os.remove(original_bam)

                        if pilon_bam is not None and os.path.isfile(pilon_bam):
                            os.remove(pilon_bam)

                        if 'assembly_mapping' in possible_assemblies_bam_remove and \
                                os.path.isfile(possible_assemblies_bam_remove['assembly_mapping']):
                            os.remove(possible_assemblies_bam_remove[
                                'assembly_mapping'])
                        if 'pilon' in possible_assemblies_bam_remove and \
                                os.path.isfile(possible_assemblies_bam_remove['pilon']):
                            os.remove(possible_assemblies_bam_remove['pilon'])
                    else:
                        if pilon_new_bam:
                            if bam_file is not None:
                                if os.path.isfile(bam_file):
                                    os.remove(bam_file)
                                if os.path.isfile(bam_file + '.bai'):
                                    os.remove(bam_file + '.bai')

                            if original_bam is not None and os.path.isfile(
                                    original_bam):
                                os.remove(original_bam)

                            if 'assembly_mapping' in possible_assemblies_bam_remove and \
                                    os.path.isfile(possible_assemblies_bam_remove['assembly_mapping']):
                                os.remove(possible_assemblies_bam_remove[
                                    'assembly_mapping'])
                        else:
                            if original_bam is not None and os.path.isfile(original_bam) and \
                                    bam_file is not None and os.path.isfile(bam_file):
                                os.remove(bam_file)
                            if 'pilon' in possible_assemblies_bam_remove and \
                                    os.path.isfile(possible_assemblies_bam_remove['pilon']):
                                os.remove(
                                    possible_assemblies_bam_remove['pilon'])

                    if not args.skipAssemblyMapping:
                        utils.removeDirectory(assembly_mapping_folder)

                    print('\n' + 'Final assembly: ' + contigs)
                    with open(os.path.join(outdir, 'final_assembly.txt'),
                              'wt') as writer:
                        writer.write(contigs + '\n')

                    # Run MLST
                    if not args.skipMLST:
                        run_successfully, pass_qc, time_taken, failing, warning = \
                            mlst.runMlst(contigs, scheme, outdir, species_genus, mlst_scheme_genus)
                        runs['MLST'] = [
                            run_successfully, pass_qc, time_taken, failing,
                            warning, 'NA'
                        ]
                    else:
                        print('--skipMLST set. Skipping MLST analysis')
                        runs['MLST'] = skipped

                    # Run Kraken
                    if args.runKraken:
                        print('\n'
                              '--runKraken set. Running Kraken for assembly')
                        run_successfully, pass_qc, time_taken, failing, warning, _ = \
                            kraken(species=args.speciesExpected, files_to_classify=[contigs], kraken_db=args.krakenDB,
                                   files_type='fasta', outdir=outdir, version_kraken=version_kraken_global,
                                   db_mem=args.krakenMemory, quick=args.krakenQuick,
                                   min_percent_covered=args.krakenMinCov,
                                   max_unclassified_frag=args.krakenMaxUnclass, min_base_quality=args.krakenMinQual,
                                   threads=threads)
                        runs['assembly_Kraken'] = [
                            run_successfully, pass_qc, time_taken, failing,
                            warning, 'NA'
                        ]
                    else:
                        runs['assembly_Kraken'] = skipped

                    # Run insert_size
                    if args.runInsertSize:
                        print('\n' '--runInsertSize set. Running insert_size')
                        run_successfully, _, time_taken, failing = \
                            insert_size(sample_name=sample_name, reference=contigs,
                                        fastq=fastq_files, outdir=outdir, threads=threads, dist=args.insertSizeDist)
                        runs['insert_size'] = [
                            run_successfully, None, time_taken, failing, {},
                            'NA'
                        ]
                    else:
                        runs['insert_size'] = skipped
                else:
                    print(
                        'SPAdes did not run successfully! Skipping Pilon correction, Assembly Mapping check,'
                        ' MLST and Kraken (assembly) analysis and insert size determination'
                    )
            else:
                print(
                    '--skipSPAdes set. Skipping SPAdes, Pilon correction, Assembly Mapping check and MLST and Kraken'
                    ' (assembly) analysis and insert size determination')
                runs['SPAdes'] = skipped
                runs['Assembly_Mapping'] = skipped
                runs['Pilon'] = skipped
                runs['MLST'] = skipped
                runs['assembly_Kraken'] = skipped
                runs['insert_size'] = skipped
    else:
        print('Moving to the next sample')

    for step in ('reads_Kraken', 'first_Coverage', 'trueCoverage_ReMatCh',
                 'first_FastQC', 'Trimmomatic', 'second_Coverage',
                 'second_FastQC', 'Pear', 'SPAdes', 'Assembly_Mapping',
                 'Pilon', 'MLST', 'assembly_Kraken', 'insert_size'):
        if step not in runs:
            runs[step] = not_run

    # Remove Pear directory
    if not args.pearKeepFiles and pear_folder is not None:
        utils.removeDirectory(pear_folder)
    # Remove Trimmomatic directory with cleaned reads
    if not args.trimKeepFiles and trimmomatic_folder is not None:
        utils.removeDirectory(trimmomatic_folder)

    # Check run
    run_successfully = all(runs[step][0] or runs[step][0] is None
                           for step in runs)

    pass_fastq_integrity = runs['FastQ_Integrity'][0]
    pass_reads_kraken = runs['reads_Kraken'][
        1] is not False or args.krakenIgnoreQC
    pass_cov = (runs['second_Coverage'][1]
                or (runs['second_Coverage'][1] is None
                    and runs['first_Coverage'][1])) is not False
    pass_true_cov = runs['trueCoverage_ReMatCh'][
        1] is not False or args.trueCoverageIgnoreQC
    pass_fastqc = (runs['second_FastQC'][1]
                   or (runs['second_FastQC'][1] is None
                       and runs['first_FastQC'][1])) is not False
    # pass_trimmomatic = runs['Trimmomatic'][1] is not False
    # pass_pear = runs['Pear'][1] is not False
    # pass_spades = runs['SPAdes'][1] is not False or runs['Assembly_Mapping'][1] is True
    pass_spades = runs['SPAdes'][1] is not False
    pass_assembly_mapping = runs['Assembly_Mapping'][1] is not False
    pass_pilon = runs['Pilon'][0] is not False
    pass_mlst = runs['MLST'][1] is not False or args.mlstIgnoreQC
    pass_assembly_kraken = runs['assembly_Kraken'][
        1] is not False or args.krakenIgnoreQC
    pass_qc = all([
        pass_fastq_integrity, pass_reads_kraken, pass_cov, pass_true_cov,
        pass_fastqc, pass_spades, pass_assembly_mapping, pass_pilon, pass_mlst,
        pass_assembly_kraken
    ])

    return run_successfully, pass_qc, runs