def main(): program_name = 'seq_typing.py' if sys.version_info[0] < 3: sys.exit('Must be using Python 3. Try calling "python3 {}"'.format( program_name)) parser, _, _, _, _ = python_arguments(program_name, __version__) args = parser.parse_args() start_time = time.time() args.outdir = os.path.abspath(args.outdir) if not os.path.isdir(args.outdir): os.makedirs(args.outdir) # Start logger logfile, time_str = utils.start_logger(args.outdir) script_path = utils.general_information(script_name=program_name, logfile=logfile, version=__version__, outdir=args.outdir, time_str=time_str) del script_path print('\n') folders_2_remove = [] # Create modules pickles folder pickles_folder = os.path.join(args.outdir, 'pickles', '') if not os.path.isdir(pickles_folder): os.makedirs(pickles_folder) folders_2_remove.append(pickles_folder) # Run functions folders_2_remove_func, references_results, reference, references_headers = args.func( args) folders_2_remove.extend(folders_2_remove_func) # Parse results _, _, _, _, _ = parse_results.parse_results( references_results, reference, references_headers, args.outdir, args.minGeneCoverage, args.minDepthCoverage, args.typeSeparator) if not args.debug: for folder in folders_2_remove: utils.removeDirectory(folder) _ = utils.runTime(start_time)
def main(): parser = argparse.ArgumentParser(prog='patho_typing.py', description='In silico pathogenic typing directly from raw Illumina reads', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--version', help='Version information', action='version', version='{prog} v{version}'.format(prog=parser.prog, version=__version__)) parser_required = parser.add_argument_group('Required options') parser_required.add_argument('-f', '--fastq', nargs='+', action=utils.required_length((1, 2), '--fastq'), type=argparse.FileType('r'), metavar=('/path/to/input/file.fq.gz'), help='Path to single OR paired-end fastq files. If two files are passed, they will be' ' assumed as being the paired fastq files', required=True) parser_required.add_argument('-s', '--species', nargs=2, type=str, metavar=('Yersinia', 'enterocolitica'), help='Species name', required=True) parser_optional_general = parser.add_argument_group('General facultative options') parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/path/to/output/directory/', help='Path to the directory where the information will be stored', required=False, default='.') parser_optional_general.add_argument('-j', '--threads', type=int, metavar='N', help='Number of threads to use', required=False, default=1) parser_optional_general.add_argument('--trueCoverage', action='store_true', help='Assess true coverage before continue typing') parser_optional_general.add_argument('--noCheckPoint', action='store_true', help='Ignore the true coverage checking point') parser_optional_general.add_argument('--minGeneCoverage', type=int, metavar='N', help='Minimum typing percentage of target reference gene sequence covered to' ' consider a gene to be present (value between [0, 100])', required=False) parser_optional_general.add_argument('--minGeneIdentity', type=int, metavar='N', help='Minimum typing percentage of identity of reference gene sequence covered' ' to consider a gene to be present (value between [0, 100]). One INDEL' ' will be considered as one difference', required=False) parser_optional_general.add_argument('--minGeneDepth', type=int, metavar='N', help='Minimum typing gene average coverage depth of present positions to' ' consider a gene to be present (default is 1/3 of average sample' ' coverage or 15x)', required=False) parser_optional_general.add_argument('--doNotRemoveConsensus', action='store_true', help='Do not remove ReMatCh consensus sequences') parser_optional_general.add_argument('--debug', action='store_true', help='DeBug Mode: do not remove temporary files') args = parser.parse_args() if args.minGeneCoverage is not None and (args.minGeneCoverage < 0 or args.minGeneCoverage > 100): parser.error('--minGeneCoverage should be a value between [0, 100]') if args.minGeneIdentity is not None and (args.minGeneIdentity < 0 or args.minGeneIdentity > 100): parser.error('--minGeneIdentity should be a value between [0, 100]') start_time = time.time() args.outdir = os.path.abspath(args.outdir) if not os.path.isdir(args.outdir): os.makedirs(args.outdir) # Start logger logfile, time_str = utils.start_logger(args.outdir) script_path = utils.general_information(logfile, __version__, args.outdir, time_str) print('\n') rematch = include_rematch_dependencies_path() args.fastq = [fastq.name for fastq in args.fastq] reference_file, trueCoverage_file, trueCoverage_sequences, trueCoverage_headers, trueCoverage_config, typing_file, \ typing_sequences, typing_headers, typing_rules, typing_config = \ set_reference(args.species, args.outdir, script_path, args.trueCoverage) original_reference_file = str(reference_file) confirm_genes_fasta_rules(typing_headers, typing_rules) run_successfully, bam_file = mapping_reads(args.fastq, reference_file, args.threads, args.outdir, False, 1) if run_successfully: rematch_dir = os.path.join(args.outdir, 'rematch', '') if not os.path.isdir(rematch_dir): os.makedirs(rematch_dir) if args.trueCoverage: if trueCoverage_file is not None: trueCoverage_dir = os.path.join(rematch_dir, 'trueCoverage', '') if not os.path.isdir(trueCoverage_dir): os.makedirs(trueCoverage_dir) print('\n') run_successfully, trueCoverage_bam = split_bam(bam_file, trueCoverage_headers, trueCoverage_dir, args.threads) if run_successfully: run_successfully = indexAlignment(trueCoverage_bam) if run_successfully: reference_file = os.path.join(trueCoverage_dir, 'reference.fasta') write_sequeces(reference_file, trueCoverage_sequences) index_fasta_samtools(reference_file, None, None, True) config = parse_config(trueCoverage_config) runtime, run_successfully, sample_data_general, data_by_gene = \ run_rematch.run_rematch(rematch, trueCoverage_dir, reference_file, trueCoverage_bam, args.threads, config['length_extra_seq'], config['minimum_depth_presence'], config['minimum_depth_call'], config['minimum_depth_frequency_dominant_allele'], config['minimum_gene_coverage'], config['minimum_gene_identity'], args.debug, args.doNotRemoveConsensus) if run_successfully and sample_data_general['mean_sample_coverage'] is not None and \ sample_data_general['number_absent_genes'] is not None and \ sample_data_general['number_genes_multiple_alleles'] is not None: if args.minGeneDepth is None: args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 if \ sample_data_general['mean_sample_coverage'] / 3 > 15 else \ 15 exit_info = [] if sample_data_general['mean_sample_coverage'] < config['minimum_read_coverage']: exit_info.append('Sample coverage ({mean}) lower than the minimum' ' required ({minimum})' ''.format(mean=sample_data_general['mean_sample_coverage'], minimum=config['minimum_read_coverage'])) if sample_data_general['number_absent_genes'] > config['maximum_number_absent_genes']: exit_info.append('Number of absent genes ({number}) higher than the' ' maximum allowed ({maximum})' ''.format(number=sample_data_general['number_absent_genes'], maximum=config['maximum_number_absent_genes'])) if sample_data_general['number_genes_multiple_alleles'] > \ config['maximum_number_genes_multiple_alleles']: exit_info.append('Number of genes with multiple alleles' ' ({number}) higher than the maximum' ' allowed ({maximum})' ''.format(number=sample_data_general['number_genes_multiple_alleles'], maximum=config['maximum_number_genes_multiple_alleles'])) if len(exit_info) > 0: print('\n' + '\n'.join(exit_info) + '\n') e = 'TrueCoverage requirements not fulfilled' print('\n' + e + '\n') if not args.noCheckPoint: clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) _ = utils.runTime(start_time) sys.exit(e) else: e = 'TrueCoverage module did not run successfully' print('\n' + e + '\n') if not args.noCheckPoint: clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) _ = utils.runTime(start_time) sys.exit(e) print('\n') typing_dir = os.path.join(rematch_dir, 'typing', '') if not os.path.isdir(typing_dir): os.makedirs(typing_dir) run_successfully, bam_file = split_bam(bam_file, typing_headers, typing_dir, args.threads) if run_successfully: run_successfully = indexAlignment(bam_file) if run_successfully: reference_file = os.path.join(typing_dir, 'reference.fasta') write_sequeces(reference_file, typing_sequences) index_fasta_samtools(reference_file, None, None, True) rematch_dir = str(typing_dir) if not run_successfully: if args.noCheckPoint: clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) _ = utils.runTime(start_time) sys.exit('Something in the required TrueCoverage analysis went wrong') else: print('\n' 'WARNING: it was not found trueCoverage target files. trueCoverage will not run.' '\n') if run_successfully: config = parse_config(typing_config) if args.minGeneCoverage is not None: config['minimum_gene_coverage'] = args.minGeneCoverage if args.minGeneIdentity is not None: config['minimum_gene_identity'] = args.minGeneIdentity runtime, run_successfully, sample_data_general, data_by_gene = \ run_rematch.run_rematch(rematch, rematch_dir, reference_file, bam_file, args.threads, config['length_extra_seq'], config['minimum_depth_presence'], config['minimum_depth_call'], config['minimum_depth_frequency_dominant_allele'], config['minimum_gene_coverage'], config['minimum_gene_identity'], args.debug, args.doNotRemoveConsensus) if run_successfully and data_by_gene is not None: if args.minGeneDepth is None: args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 if \ sample_data_general['mean_sample_coverage'] / 3 > 15 else \ 15 _, _, _ = typing.typing(data_by_gene, typing_rules, config['minimum_gene_coverage'], config['minimum_gene_identity'], args.minGeneDepth, args.outdir) else: clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) _ = utils.runTime(start_time) sys.exit('ReMatCh run for pathotyping did not run successfully') else: clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) _ = utils.runTime(start_time) sys.exit('Something did not run successfully') clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) print('\n') _ = utils.runTime(start_time)
def runRematch(args): workdir = os.path.abspath(args.workdir) if not os.path.isdir(workdir): os.makedirs(workdir) asperaKey = os.path.abspath( args.asperaKey.name) if args.asperaKey is not None else None # Start logger logfile, time_str = utils.start_logger(workdir) # Get general information utils.general_information(logfile, version, workdir, time_str, args.doNotUseProvidedSoftware, asperaKey, args.downloadCramBam) # Set listIDs listIDs, searched_fastq_files = getListIDs( workdir, args.listIDs.name if args.listIDs is not None else None, args.taxon) # Run ReMatCh for each sample print '\n' + 'STARTING ReMatCh' + '\n' # Clean sequences headers reference_file, gene_list_reference = clean_headers_reference_file( os.path.abspath(args.reference.name), workdir, args.extraSeq) if len(gene_list_reference) == 0: sys.exit('No sequences left') # To use in combined report number_samples_successfully = 0 for sample in listIDs: sample_start_time = time.time() print '\n\n' + 'Sample ID: ' + sample # Create sample outdir sample_outdir = os.path.join(workdir, sample, '') if not os.path.isdir(sample_outdir): os.mkdir(sample_outdir) run_successfully_fastq = None time_taken_fastq = 0 sequencingInformation = { 'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'date_download': None } if not searched_fastq_files: # Download Files time_taken_fastq, run_successfully_fastq, fastq_files, sequencingInformation = download.runDownload( sample, args.downloadLibrariesType, asperaKey, sample_outdir, args.downloadCramBam, args.threads, args.downloadInstrumentPlatform) else: fastq_files = listIDs[sample] fileSize = None run_successfully_rematch_first = None run_successfully_rematch_second = None time_taken_rematch_first = 0 time_taken_rematch_second = 0 if run_successfully_fastq is not False: fileSize = sum(os.path.getsize(fastq) for fastq in fastq_files) # Run ReMatCh time_taken_rematch_first, run_successfully_rematch_first, data_by_gene, sample_data_general_first, consensus_files = rematch_module.runRematchModule( sample, fastq_files, reference_file, args.threads, sample_outdir, args.extraSeq, args.minCovPresence, args.minCovCall, args.minFrequencyDominantAllele, args.minGeneCoverage, args.conservedSeq, args.debug, args.numMapLoc, args.minGeneIdentity) if run_successfully_rematch_first: write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'first_run', args.minGeneIdentity) if args.doubleRun: rematch_second_outdir = os.path.join( sample_outdir, 'rematch_second_run', '') if not os.path.isdir(rematch_second_outdir): os.mkdir(rematch_second_outdir) consensus_concatenated_fasta, consensus_concatenated_gene_list = concatenate_extraSeq_2_consensus( consensus_files['noMatter'], reference_file, args.extraSeq, rematch_second_outdir) if len(consensus_concatenated_gene_list) > 0: time_taken_rematch_second, run_successfully_rematch_second, data_by_gene, sample_data_general_second, consensus_files = rematch_module.runRematchModule( sample, fastq_files, consensus_concatenated_fasta, args.threads, rematch_second_outdir, args.extraSeq, args.minCovPresence, args.minCovCall, args.minFrequencyDominantAllele, args.minGeneCoverage, args.conservedSeq, args.debug, args.numMapLoc, args.minGeneIdentity) if not args.debug: os.remove(consensus_concatenated_fasta) if run_successfully_rematch_second: write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'second_run', args.minGeneIdentity) else: print 'No sequences left after ReMatCh module first run. Second run will not be performed' if not searched_fastq_files and not args.keepDownloadedFastq and fastq_files is not None: for fastq in fastq_files: if os.path.isfile(fastq): os.remove(fastq) time_taken = utils.runTime(sample_start_time) write_sample_report( sample, workdir, time_str, fileSize, run_successfully_fastq, run_successfully_rematch_first, run_successfully_rematch_second, time_taken_fastq, time_taken_rematch_first, time_taken_rematch_second, time_taken, sequencingInformation, sample_data_general_first if run_successfully_rematch_first else { 'number_absent_genes': None, 'number_genes_multiple_alleles': None, 'mean_sample_coverage': None }, sample_data_general_second if run_successfully_rematch_second else { 'number_absent_genes': None, 'number_genes_multiple_alleles': None, 'mean_sample_coverage': None }, fastq_files if fastq_files is not None else '') if all([ run_successfully_fastq is not False, run_successfully_rematch_first is not False, run_successfully_rematch_second is not False ]): number_samples_successfully += 1 return number_samples_successfully, len(listIDs)
def run_rematch(args): workdir = os.path.abspath(args.workdir) if not os.path.isdir(workdir): os.makedirs(workdir) aspera_key = os.path.abspath(args.asperaKey.name) if args.asperaKey is not None else None # Start logger logfile, time_str = utils.start_logger(workdir) # Get general information script_path = utils.general_information(logfile, __version__, workdir, time_str, args.doNotUseProvidedSoftware, aspera_key, args.downloadCramBam, args.SRA, args.SRAopt) # Set list_ids list_ids, searched_fastq_files = get_list_ids(workdir, args.listIDs.name if args.listIDs is not None else None, args.taxon) mlst_sequences = None mlst_dicts = None if args.mlst is not None: time_taken_pub_mlst, mlst_dicts, mlst_sequences = check_mlst.download_pub_mlst_xml(args.mlst, args.mlstSchemaNumber, workdir) args.softClip_recodeRun = 'first' if args.reference is None: if args.mlst is not None: reference_file = check_mlst.check_existing_schema(args.mlst, args.mlstSchemaNumber, script_path) args.extraSeq = 200 if reference_file is None: print('It was not found provided MLST scheme sequences for ' + args.mlst) print('Trying to obtain reference MLST sequences from PubMLST') if len(mlst_sequences) > 0: reference_file = check_mlst.write_mlst_reference(args.mlst, mlst_sequences, workdir, time_str) args.extraSeq = 0 else: sys.exit('It was not possible to download MLST sequences from PubMLST!') else: print('Using provided scheme as referece: ' + reference_file) else: sys.exit('Need to provide at least one of the following options: "--reference" and "--mlst"') else: reference_file = os.path.abspath(args.reference.name) # Run ReMatCh for each sample print('\n' + 'STARTING ReMatCh' + '\n') # Clean sequences headers reference_file, gene_list_reference, reference_dict = clean_headers_reference_file(reference_file, workdir, args.extraSeq) if args.mlst is not None: problem_genes = False for header in mlst_sequences: if header not in gene_list_reference: print('MLST gene {header} not found between reference sequences'.format(header=header)) problem_genes = True if problem_genes: sys.exit('Missing MLST genes from reference sequences (at least sequences names do not match)!') if len(gene_list_reference) == 0: sys.exit('No sequences left') # To use in combined report number_samples_successfully = 0 genes_present_coverage_depth = {} genes_present_sequence_coverage = {} for sample in list_ids: sample_start_time = time.time() print('\n\n' + 'Sample ID: ' + sample) # Create sample outdir sample_outdir = os.path.join(workdir, sample, '') if not os.path.isdir(sample_outdir): os.mkdir(sample_outdir) run_successfully_fastq = None time_taken_fastq = 0 sequencing_information = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'nominal_length': None, 'read_count': None, 'base_count': None, 'date_download': None} if not searched_fastq_files: # Download Files time_taken_fastq, run_successfully_fastq, fastq_files, sequencing_information = \ download.run_download(sample, args.downloadLibrariesType, aspera_key, sample_outdir, args.downloadCramBam, args.threads, args.downloadInstrumentPlatform, args.SRA, args.SRAopt) else: fastq_files = list_ids[sample] file_size = None run_successfully_rematch_first = None run_successfully_rematch_second = None time_taken_rematch_first = 0 time_taken_rematch_second = 0 sample_data_general_first = None sample_data_general_second = None if run_successfully_fastq is not False: file_size = sum(os.path.getsize(fastq) for fastq in fastq_files) # Run ReMatCh time_taken_rematch_first, run_successfully_rematch_first, data_by_gene, sample_data_general_first, \ consensus_files, consensus_sequences = \ rematch_module.run_rematch_module(sample, fastq_files, reference_file, args.threads, sample_outdir, args.extraSeq, args.minCovPresence, args.minCovCall, args.minFrequencyDominantAllele, args.minGeneCoverage, args.debug, args.numMapLoc, args.minGeneIdentity, 'first', args.softClip_baseQuality, args.softClip_recodeRun, reference_dict, args.softClip_cigarFlagRecode, args.bowtieAlgo, args.bowtieOPT, gene_list_reference, args.notWriteConsensus, clean_run=True) if run_successfully_rematch_first: if args.mlst is not None and (args.mlstRun == 'first' or args.mlstRun == 'all'): run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'first', workdir, time_str) genes_present_coverage_depth = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'first_run', args.minGeneIdentity, 'coverage_depth', args.summary, genes_present_coverage_depth) if args.reportSequenceCoverage: genes_present_sequence_coverage = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'first_run', args.minGeneIdentity, 'sequence_coverage', args.summary, genes_present_sequence_coverage) if args.doubleRun: rematch_second_outdir = os.path.join(sample_outdir, 'rematch_second_run', '') if not os.path.isdir(rematch_second_outdir): os.mkdir(rematch_second_outdir) consensus_concatenated_fasta, consensus_concatenated_gene_list, consensus_concatenated_dict, \ number_consensus_with_sequences = \ concatenate_extra_seq_2_consensus(consensus_files['noMatter'], reference_file, args.extraSeq, rematch_second_outdir) if len(consensus_concatenated_gene_list) > 0: if args.mlst is None or \ (args.mlst is not None and number_consensus_with_sequences == len(gene_list_reference)): time_taken_rematch_second, run_successfully_rematch_second, data_by_gene, \ sample_data_general_second, consensus_files, consensus_sequences = \ rematch_module.run_rematch_module(sample, fastq_files, consensus_concatenated_fasta, args.threads, rematch_second_outdir, args.extraSeq, args.minCovPresence, args.minCovCall, args.minFrequencyDominantAllele, args.minGeneCoverage, args.debug, args.numMapLoc, args.minGeneIdentity, 'second', args.softClip_baseQuality, args.softClip_recodeRun, consensus_concatenated_dict, args.softClip_cigarFlagRecode, args.bowtieAlgo, args.bowtieOPT, gene_list_reference, args.notWriteConsensus, clean_run=True) if not args.debug: os.remove(consensus_concatenated_fasta) if run_successfully_rematch_second: if args.mlst is not None and (args.mlstRun == 'second' or args.mlstRun == 'all'): run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'second', workdir, time_str) _ = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'second_run', args.minGeneIdentity, 'coverage_depth', False, {}) if args.reportSequenceCoverage: _ = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'second_run', args.minGeneIdentity, 'sequence_coverage', False, {}) else: print('Some sequences missing after ReMatCh module first run. Second run will not be' ' performed') if os.path.isfile(consensus_concatenated_fasta): os.remove(consensus_concatenated_fasta) if os.path.isdir(rematch_second_outdir): utils.remove_directory(rematch_second_outdir) else: print('No sequences left after ReMatCh module first run. Second run will not be performed') if os.path.isfile(consensus_concatenated_fasta): os.remove(consensus_concatenated_fasta) if os.path.isdir(rematch_second_outdir): utils.remove_directory(rematch_second_outdir) if not searched_fastq_files and not args.keepDownloadedFastq and fastq_files is not None: for fastq in fastq_files: if os.path.isfile(fastq): os.remove(fastq) time_taken = utils.run_time(sample_start_time) write_sample_report(sample, workdir, time_str, file_size, run_successfully_fastq, run_successfully_rematch_first, run_successfully_rematch_second, time_taken_fastq, time_taken_rematch_first, time_taken_rematch_second, time_taken, sequencing_information, sample_data_general_first if run_successfully_rematch_first else {'number_absent_genes': None, 'number_genes_multiple_alleles': None, 'mean_sample_coverage': None}, sample_data_general_second if run_successfully_rematch_second else {'number_absent_genes': None, 'number_genes_multiple_alleles': None, 'mean_sample_coverage': None}, fastq_files if fastq_files is not None else '') if all([run_successfully_fastq is not False, run_successfully_rematch_first is not False, run_successfully_rematch_second is not False]): number_samples_successfully += 1 if args.summary: write_summary_report(workdir, 'coverage_depth', time_str, gene_list_reference, genes_present_coverage_depth) if args.reportSequenceCoverage: write_summary_report(workdir, 'sequence_coverage', time_str, gene_list_reference, genes_present_sequence_coverage) return number_samples_successfully, len(list_ids)
def main(): program_name = 'ecoli_stx_subtyping.py' if sys.version_info[0] < 3: sys.exit('Must be using Python 3. Try calling "python3 {}"'.format( program_name)) parser, parser_reads, _, parser_assembly, _ = python_arguments( program_name=program_name, version=version) parser.description = 'Gets E. coli stx subtypes' # Add specific arguments parser_reads.add_argument( '--stx2covered', type=float, metavar='N', help='Minimal percentage of sequence covered to consider extra stx2' ' subtypes (value between [0, 100]) (default: 100)', required=False, default=100) parser_reads.add_argument( '--stx2identity', type=float, metavar='N', help='Minimal sequence identity to consider extra stx2' ' subtypes (value between [0, 100]) (default: 99.5)', required=False, default=99.5) parser_assembly.add_argument( '--stx2covered', type=float, metavar='N', help='Minimal percentage of sequence covered to consider extra stx2' ' subtypes (value between [0, 100]) (default: 100)', required=False, default=100) parser_assembly.add_argument( '--stx2identity', type=float, metavar='N', help='Minimal sequence identity to consider extra stx2' ' subtypes (value between [0, 100]) (default: 99.5)', required=False, default=99.5) args = parser.parse_args() msg = [] if args.minGeneCoverage < 0 or args.minGeneCoverage > 100: msg.append('--minGeneCoverage should be a value between [0, 100]') if args.minGeneIdentity < 0 or args.minGeneIdentity > 100: msg.append('--minGeneIdentity should be a value between [0, 100]') if args.stx2covered < 0 or args.stx2covered > 100: msg.append('--stx2covered should be a value between [0, 100]') if args.stx2identity < 0 or args.stx2identity > 100: msg.append('--stx2identity should be a value between [0, 100]') if args.org != ['stx', 'subtyping']: msg.append('Use "--org stx subtyping" with {}'.format(program_name)) if len(msg) > 0: argparse.ArgumentParser(prog='{} options'.format(program_name)).error( '\n'.join(msg)) start_time = time.time() args.outdir = os.path.abspath(args.outdir) if not os.path.isdir(args.outdir): os.makedirs(args.outdir) # Start logger logfile, time_str = utils.start_logger(args.outdir) _ = utils.general_information(script_name=program_name, logfile=logfile, version=version, outdir=args.outdir, time_str=time_str) print('\n') folders_2_remove = [] # Create modules pickles folder pickles_folder = os.path.join(args.outdir, 'pickles', '') if not os.path.isdir(pickles_folder): os.makedirs(pickles_folder) folders_2_remove.append(pickles_folder) # Run functions folders_2_remove_func, references_results, reference, references_headers = args.func( args) folders_2_remove.extend(folders_2_remove_func) # Parse results _, _, _, _, _ = parse_results.parse_results( references_results, reference, references_headers, args.outdir, args.minGeneCoverage, args.minDepthCoverage, args.typeSeparator) stx1_result, stx2_result = stx_subtype_parser( os.path.join(args.outdir, 'seq_typing.report_types.tab'), [ ref_file for ref_file in reference if 'stx1' in os.path.basename(ref_file).lower() ][0], [ ref_file for ref_file in reference if 'stx2' in os.path.basename(ref_file).lower() ][0], args.stx2covered, args.stx2identity) # Rename the file to keep ecoli_stx_subtyping stamp if os.path.isfile(os.path.join(args.outdir, 'seq_typing.report_types.tab')): os.rename( os.path.join(args.outdir, 'seq_typing.report_types.tab'), os.path.join(args.outdir, 'seq_typing.ecoli_stx_subtyping.report_types.tab')) # Remove the file to only keep the ecoli_stx_subtyping one if os.path.isfile(os.path.join(args.outdir, 'seq_typing.report.txt')): os.remove(os.path.join(args.outdir, 'seq_typing.report.txt')) print('\n' 'E. coli stx_subtyping - {stx1_result}:{stx2_result}\n' '\n'.format(stx1_result=stx1_result, stx2_result=stx2_result)) with open(os.path.join(args.outdir, 'seq_typing.ecoli_stx_subtyping.txt'), 'wt') as writer: writer.write(':'.join([stx1_result, stx2_result])) if not args.debug: for folder in folders_2_remove: utils.removeDirectory(folder) _ = utils.runTime(start_time)