def gen_final_tree(self): if os.path.isfile(self.final_unrooted_tree) == False: if self.model == True: options = ['-m', 'GTRGAMMA'] else: options = ['-V','-m', 'GTRCAT'] if self.no_recombination_filter == True: options += ["-n","bootstrap","-t",os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees")] else: options += ["-n","bootstrap","-t",os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre")] self.logger.info("Generating final tree...") ec = snpiphy.run_command([ "raxmlHPC-PTHREADS", "-T",str(self.threads) ] + options + [ "-p",str(random.randint(10000,99999)), "-f", "b", "-z", os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"), "-n", "final", '-w', self.phylogenetic_trees ]) if ec != 0: self.logger.error("Final RAxML tree generation has failed.") sys.exit(1) Phylo.convert(os.path.join(self.phylogenetic_trees, 'RAxML_bipartitions.final'), 'newick', self.final_unrooted_tree, 'nexus') else: self.logger.info("Final RAxML tree has already been generated. Skipping this step...")
def gen_bootstrap_tree(self): if os.path.isfile(self.bootstrap_tree) == False: if self.model == True: options = ['-m', 'GTRGAMMA'] else: options = ['-V','-m', 'GTRCAT'] if self.no_recombination_filter == True: options += ["-n","bootstrap","-s",os.path.join(self.core_align, 'core.trimmed.aln')] else: options += ["-n","bootstrap","-s",os.path.join(self.recomb_filter, "filtered_core_aln.filtered_polymorphic_sites.fasta")] self.logger.info("Running bootstrap analysis...") ec = snpiphy.run_command([ "raxmlHPC-PTHREADS", "-T",str(self.threads) ] + options + [ "-p",str(random.randint(10000,99999)), "-b",str(random.randint(10000,99999)), "-#","100", '-w', self.phylogenetic_trees ]) if ec != 0: self.logger.error("RAxML bootstrap has failed.") sys.exit(1) Phylo.convert(os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"), 'newick', self.bootstrap_tree, 'nexus') else: self.logger.info("Bootstrap RAxML trees have already been generated. Skipping this step...")
def run_parallel_snippy(self, commands): self.logger.info("Running snippy on reads with parallel and {} threads".format(self.threads)) if len(commands) > 0: ec = snpiphy.run_command([ 'parallel', '-j', str(self.threads), ':::'] + commands ) if ec != 0: self.logger.error("Error running snippy on one of the reads files. Please check your files and error output.") sys.exit(1)
def run_snippy(self, read_path, reads_type): name = snpiphy.get_samplename(read_path) self.logger.info("{}: Running snippy...\n".format(name)) if reads_type == 'P': add_cmd = ['--peil', read_path] elif reads_type == 'S': add_cmd = ['--se', read_path] elif reads_type == 'A': add_cmd = ['--ctgs', read_path] else: self.logger.error("Cannot determine type for sequence file: {}\tPlease check the pairing status in your input reads list provided and ensure it is either A, P or S") ec = snpiphy.run_command([ 'snippy', '--cpus', str(self.threads), '--prefix', name, '--outdir', os.path.join(self.ref_aligns, name), '--ref', self.reference ] + add_cmd ) if ec != 0: self.logger.error("Error running snippy on sequence file: {}. Please check your files and error output.".format(os.path.basename(read_path))) sys.exit(1) self.logger.info("{}: snippy has completed successfully.\n".format(name))
def build_initial_tree(self): if os.path.isfile(self.init_tree) == False: if self.model == True: options = ['-m', 'GTRGAMMA'] else: options = ['-V','-m', 'GTRCAT'] self.logger.info("Building inital phylogenetic tree...") ec = snpiphy.run_command([ "raxmlHPC-PTHREADS", "-T",str(self.threads) ] + options + [ "-p",str(random.randint(10000,99999)), "-#","20", "-s",os.path.join(self.core_align, 'core.trimmed.aln'), "-n","initial_trees", '-w', self.initial_trees ]) if ec != 0: self.logger.error("RAxML initial tree building has failed.") sys.exit(1) Phylo.convert(os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees"), 'newick', self.init_tree, 'nexus') else: self.logger.info("Initial RAxML tree has already been generated. Skipping this step...")
def filter_recombinant_positions(self): if os.path.isfile(self.init_filtered_tree) == False: if self.model == True: options = ['-r', 'GTRGAMMA'] else: options = [] with snpiphy.cd(self.recomb_filter): self.logger.info("Scanning and filtering recombination positions with gubbins...") if self.tree_builder == 'fasttree': ec = snpiphy.run_command([ "run_gubbins.py", "-v", '--tree_builder', 'fasttree', '-s', self.init_tree, ] + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.error("Running gubbins using fasttree method has failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.") sys.exit(1) else: ec = snpiphy.run_command([ "run_gubbins.py", "-v"] + '-s', self.init_tree, options + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.warn("Recombination filtering using the RAxML only method has failed. Retrying with FastTree for first iteration.") for file in os.listdir(self.recomb_filter): if file.startswith('core.trimmed.aln.'): os.remove(file) ec = snpiphy.run_command([ "run_gubbins.py", "-v", '--tree_builder', 'hybrid', '-s', self.init_tree, ] + options + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.warn("Recombination filtering using hybrid RAxML/FastTree method has failed. Retrying with FastTree for all iterations.") for file in os.listdir(self.recomb_filter): if file.startswith('core.trimmed.aln.'): os.remove(file) ec = snpiphy.run_command([ "run_gubbins.py", "-v", '--tree_builder', 'fasttree', '-s', self.init_tree, ] + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.error("Running gubbins using all methods have failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.") sys.exit(1) Phylo.convert(os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre"), 'newick', self.init_filtered_tree, 'nexus') self.logger.info("Recombination filtering by gubbins has completed successfully.") else: self.logger.info("Recombination filtering by gubbins has already been done. Skipping this step...")
def run_snippy_core(self): self.logger.info("Building core genome alignment from individual reference alignments...") if os.path.isfile(self.raw_core_aln) == False: with snpiphy.cd(self.ref_aligns): self.logger.debug("Running snippy-core...") ec = snpiphy.run_command(( [ "snippy-core", '--ref', self.reference ]+self.sample_names )) if ec != 0: self.logger.error("Error building core alignment before distant sequences removed. Please check your files and error output.") sys.exit(1) self.logger.debug("Examining snippy core output for poorly aligned sequences...") core_data_handle = open("core.txt", 'r') exclude_log = open(os.path.join(self.excluded_seqs, "removed_sequences.log"), 'w') bad_seqs = [] for line in core_data_handle: line_data = line.strip().split("\t") if line_data[0] == "ID" or line_data[0] == "Reference": continue coverage = 100 * float(line_data[2]) / float(line_data[1]) if coverage < self.cutoff: archive = "{}.alignment.tar.gz".format(line_data[0]) # reads_file = snpiphy.find_source_file(line_data[0], self.reads_dir) # moved_reads_file = os.path.join(self.excluded_seqs, os.path.basename(reads_file)) moved_archive = os.path.join(self.excluded_seqs, "{}.tar.gz".format(line_data[0])) self.logger.info("Sample {} coverage is too low ({:.2f}%), removing from core alignment. Reads and reference mapping data will be retained in archive: {}".format(line_data[0],coverage,archive)) exclude_log.write("Sample {} coverage is too low ({:.2f}%), removing from core alignment. Reads and reference mapping data will be retained in archive: {}\n".format(line_data[0],coverage,archive)) exitcode = snpiphy.run_command(["tar", "cvzf", archive, line_data[0]]) if ec != 0: self.logger.error("Error compressing excluded alignment for sample: {}. Please check your files and error output.".format(line_data[0])) sys.exit(1) # os.rename(reads_file, moved_reads_file) os.rename(archive, moved_archive) shutil.rmtree(line_data[0]) bad_seqs.append(line_data[0]) else: self.logger.debug("Sample {} coverage is ok ({:.2f}%)".format(line_data[0],coverage)) core_data_handle.close() exclude_log.close() self.logger.debug("Deleting first core alignment...") alignment_files = [os.path.join(self.ref_aligns, item) for item in os.listdir(self.ref_aligns) if os.path.isfile(item)] for file in alignment_files: if file.startswith("core"): os.remove(file) self.logger.debug("Running snippy-core again...") ec = snpiphy.run_command([ "snippy-core", '--ref', self.reference ]+[ x for x in self.sample_names if (x in bad_seqs) == False ]) if ec != 0: self.logger.error("Error building core alignment after distant sequences removed. Please check your files and error output.") sys.exit(1) self.logger.debug("Moving core alignments to {}".format(self.core_align)) alignment_files = [os.path.join(self.ref_aligns, item) for item in os.listdir(self.ref_aligns) if os.path.isfile(item)] for file in alignment_files: if os.path.basename(file).startswith("core"): shutil.move(file, self.core_align) shutil.move(os.path.join(self.core_align, "core.aln"), self.raw_core_aln) os.symlink(self.raw_core_aln, os.path.join(self.core_align, "core.aln")) else: self.logger.info("Core alignment has already been generated. Skipping this step...")