def get_sample_args_fastq(fastq_files_list, outdir, pairEnd_filesSeparation_list): new_indir = os.path.join(outdir, 'reads', '') utils.removeDirectory(new_indir) os.mkdir(new_indir) samples = [] for fastq in fastq_files_list: fastq_link = os.path.join(new_indir, os.path.basename(fastq)) os.symlink(fastq, fastq_link) samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory(new_indir, outdir, pairEnd_filesSeparation_list) return new_indir, samples, removeCreatedSamplesDirectories, indir_same_outdir
def get_sample_args_fastq(fastq_files_list, outdir, pairEnd_filesSeparation_list): new_indir = os.path.join(outdir, 'reads', '') utils.removeDirectory(new_indir) os.mkdir(new_indir) samples = [] for fastq in fastq_files_list: fastq_link = os.path.join(new_indir, os.path.basename(fastq)) os.symlink(fastq, fastq_link) samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory( new_indir, outdir, pairEnd_filesSeparation_list) return new_indir, samples, removeCreatedSamplesDirectories, indir_same_outdir
def get_samples(args_inputDirectory, args_fastq, outdir, pairEnd_filesSeparation_list): if args_fastq is None: # Check if input directory exists with fastq files and store samples name that have fastq files inputDirectory = os.path.abspath(os.path.join(args_inputDirectory, '')) print '' samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory(inputDirectory, outdir, pairEnd_filesSeparation_list) elif args_inputDirectory is None: fastq_files = [os.path.abspath(fastq.name) for fastq in args_fastq] if fastq_files[0] == fastq_files[1]: sys.exit('Same fastq file provided twice') inputDirectory, samples, removeCreatedSamplesDirectories, indir_same_outdir = get_sample_args_fastq(fastq_files, outdir, pairEnd_filesSeparation_list) return samples, inputDirectory, removeCreatedSamplesDirectories, indir_same_outdir
def get_samples(args_inputDirectory, args_fastq, outdir, pairEnd_filesSeparation_list): if args_fastq is None: # Check if input directory exists with fastq files and store samples name that have fastq files inputDirectory = os.path.abspath(os.path.join(args_inputDirectory, '')) print '' samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory( inputDirectory, outdir, pairEnd_filesSeparation_list) elif args_inputDirectory is None: fastq_files = [os.path.abspath(fastq.name) for fastq in args_fastq] if fastq_files[0] == fastq_files[1]: sys.exit('Same fastq file provided twice') inputDirectory, samples, removeCreatedSamplesDirectories, indir_same_outdir = get_sample_args_fastq( fastq_files, outdir, pairEnd_filesSeparation_list) return samples, inputDirectory, removeCreatedSamplesDirectories, indir_same_outdir
def main(): version = '2.0' args = utils.parseArguments(version) general_start_time = time.time() time_str = time.strftime("%Y%m%d-%H%M%S") # Check if output directory exists outdir = os.path.abspath(os.path.join(args.outdir, '')) if not os.path.isdir(outdir): os.makedirs(outdir) # Start logger sys.stdout = utils.Logger(outdir, time_str) print '\n' + '==========> INNUca.py <==========' print '\n' + 'Program start: ' + time.ctime() # Tells where the logfile will be stored print '\n' + 'LOGFILE:' print sys.stdout.getLogFile() # Print command print '\n' + 'COMMAND:' script_path = os.path.abspath(sys.argv[0]) print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) # Print directory where programme was lunch print '\n' + 'PRESENT DIRECTORY :' print os.getcwd() # Print program version print '\n' + 'VERSION INNUca.py:' utils.scriptVersionGit(version, os.getcwd(), script_path) # Get CPU information utils.get_cpu_information(outdir, time_str) # Set and print PATH variable utils.setPATHvariable(args.doNotUseProvidedSoftware, script_path) # Check programms programs_version_dictionary = {} programs_version_dictionary['gunzip'] = ['--version', '>=', '1.6'] if (not args.skipTrueCoverage or (not args.skipPilon and not args.skipSPAdes)): programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] if not (args.skipFastQC and args.skipTrimmomatic and (args.skipPilon or args.skipSPAdes)): programs_version_dictionary['java'] = ['-version', '>=', '1.8'] if not args.skipFastQC: programs_version_dictionary['fastqc'] = ['--version', '==', '0.11.5'] if not args.skipTrimmomatic: programs_version_dictionary['trimmomatic-0.36.jar'] = [ '-version', '==', '0.36' ] if not args.skipSPAdes: programs_version_dictionary['spades.py'] = ['--version', '>=', '3.9.0'] if not args.skipPilon and not args.skipSPAdes: programs_version_dictionary['pilon-1.18.jar'] = [ '--version', '==', '1.18' ] if not args.skipMLST and not args.skipSPAdes: programs_version_dictionary['mlst'] = ['--version', '>=', '2.4'] missingPrograms, programs_version_dictionary = utils.checkPrograms( programs_version_dictionary) if len(missingPrograms) > 0: sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) # .jar paths jar_path_trimmomatic = None if not args.skipTrimmomatic: jar_path_trimmomatic = programs_version_dictionary[ 'trimmomatic-0.36.jar'][3] jar_path_pilon = None if not args.skipPilon and not args.skipSPAdes: jar_path_pilon = programs_version_dictionary['pilon-1.18.jar'][3] # Check if input directory exists with fastq files and store samples name that have fastq files inputDirectory = os.path.abspath(os.path.join(args.inputDirectory, '')) # pairEnd_filesSeparation_list = args.pairEnd_filesSeparation pairEnd_filesSeparation_list = None print '' samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory( inputDirectory, outdir, pairEnd_filesSeparation_list) # Start running the analysis print '\n' + 'RUNNING INNUca.py' # Prepare run report file samples_report_path = os.path.join(outdir, 'samples_report.' + time_str + '.tab') utils.start_sample_report_file(samples_report_path) number_samples_successfully = 0 number_samples_pass = 0 # Get MLST scheme to use scheme = 'unknown' if not args.skipMLST and not args.skipSPAdes: scheme = mlst.getScheme(args.speciesExpected) # Get path to blastn mlst.getBlastPath() # Get trueCoverage_ReMatCh settings trueCoverage_config = None if not args.skipTrueCoverage: trueCoverage_reference = None trueCoverage_config_file = None trueCoverage_config = None if args.trueConfigFile is None: print 'No trueCoverage_ReMatCh config file was provided. Search for default files' trueCoverage_config_file, trueCoverage_reference = trueCoverage.check_existing_default_config( args.speciesExpected, script_path) else: trueCoverage_config_file = args.trueConfigFile.name if trueCoverage_config_file is not None: trueCoverage_config = trueCoverage.parse_config( trueCoverage_config_file) if args.trueConfigFile is None and trueCoverage_config is not None: trueCoverage_config['reference_file'] = trueCoverage_reference if trueCoverage_config is not None: print 'The following trueCoverage_ReMatCh config file will be used: ' + trueCoverage_config_file print 'The following trueCoverage_ReMatCh reference file will be used: ' + trueCoverage_config[ 'reference_file'] + '\n' else: print 'No trueCoverage_ReMatCh config file was found' # Memory available_memory_GB = utils.get_free_memory() / (1024.0**2) # Determine SPAdes maximum memory spadesMaxMemory = None if not args.skipSPAdes: print '' spadesMaxMemory = spades.define_memory(args.spadesMaxMemory, args.threads, available_memory_GB) # Determine .jar maximum memory jarMaxMemory = 'off' if not (args.skipTrimmomatic and (args.skipSPAdes or args.skipPilon)): print '' jarMaxMemory = utils.define_jar_max_memory(args.jarMaxMemory, args.threads, available_memory_GB) # Run INNUca for each sample for sample in samples: sample_start_time = time.time() print '\n' + 'Sample: ' + sample + '\n' # Create sample outdir sample_outdir = os.path.abspath(os.path.join(outdir, sample, '')) if not os.path.isdir(sample_outdir): os.makedirs(sample_outdir) # Get fastq files fastq_files = utils.searchFastqFiles( os.path.join(inputDirectory, sample, ''), pairEnd_filesSeparation_list, False) if len(fastq_files) == 1: print 'Only one fastq file was found: ' + str(fastq_files) print 'Pair-End sequencing is required. Moving to the next sample' continue print 'The following files will be used:' print str(fastq_files) + '\n' # Run INNUca.py analysis run_successfully, pass_qc, run_report = run_INNUca( sample, sample_outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon, jarMaxMemory, trueCoverage_config) # Save sample fail report fail_report_path = os.path.join(sample_outdir, 'fail_report.txt') utils.write_fail_report(fail_report_path, run_report) # Save runs statistics if run_successfully: number_samples_successfully += 1 if pass_qc: number_samples_pass += 1 # Get raw reads files size fileSize = sum(os.path.getsize(fastq) for fastq in fastq_files) # Remove sample directory if it was created during the process if removeCreatedSamplesDirectories and not indir_same_outdir: utils.removeDirectory(os.path.join(inputDirectory, sample, '')) print 'END ' + sample + ' analysis' time_taken = utils.runTime(sample_start_time) # Save run report utils.write_sample_report(samples_report_path, sample, run_successfully, pass_qc, time_taken, fileSize, run_report) # Run report print '\n' + 'END INNUca.py' print '\n' + str(number_samples_successfully) + ' samples out of ' + str( len(samples)) + ' run successfully' print '\n' + str(number_samples_pass) + ' samples out of ' + str( number_samples_successfully ) + ' (run successfully) PASS INNUca.py analysis' time_taken = utils.runTime(general_start_time) del time_taken # Check whether INNUca.py run at least one sample successfully if number_samples_successfully == 0: sys.exit('No samples run successfully!')