Exemple #1
0
	def gen_final_tree(self):
		if os.path.isfile(self.final_unrooted_tree) == False:
			if self.model == True:
				options = ['-m', 'GTRGAMMA']
			else:
				options = ['-V','-m', 'GTRCAT']
			if self.no_recombination_filter == True:
				options += ["-n","bootstrap","-t",os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees")]
			else:
				options += ["-n","bootstrap","-t",os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre")]
			self.logger.info("Generating final tree...")
			ec = snpiphy.run_command([
									"raxmlHPC-PTHREADS",
									"-T",str(self.threads) ] +
									options +
									[ "-p",str(random.randint(10000,99999)),
									"-f", "b",
									"-z", os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"),
									"-n", "final",
									'-w', self.phylogenetic_trees
									])
			if ec != 0:
				self.logger.error("Final RAxML tree generation has failed.")
				sys.exit(1)
			Phylo.convert(os.path.join(self.phylogenetic_trees, 'RAxML_bipartitions.final'), 'newick', self.final_unrooted_tree, 'nexus')
		else:
			self.logger.info("Final RAxML tree has already been generated. Skipping this step...")
Exemple #2
0
	def gen_bootstrap_tree(self):
		if os.path.isfile(self.bootstrap_tree) == False:
			if self.model == True:
				options = ['-m', 'GTRGAMMA']
			else:
				options = ['-V','-m', 'GTRCAT']
			if self.no_recombination_filter == True:
				options += ["-n","bootstrap","-s",os.path.join(self.core_align, 'core.trimmed.aln')]
			else:
				options += ["-n","bootstrap","-s",os.path.join(self.recomb_filter, "filtered_core_aln.filtered_polymorphic_sites.fasta")]
			self.logger.info("Running bootstrap analysis...")
			ec = snpiphy.run_command([
									"raxmlHPC-PTHREADS",
									"-T",str(self.threads) ] +
									options +
									[ "-p",str(random.randint(10000,99999)),
									"-b",str(random.randint(10000,99999)),
									"-#","100",
									'-w', self.phylogenetic_trees
									])
			if ec != 0:
				self.logger.error("RAxML bootstrap has failed.")
				sys.exit(1)
			Phylo.convert(os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"), 'newick', self.bootstrap_tree, 'nexus')
		else:
			self.logger.info("Bootstrap RAxML trees have already been generated. Skipping this step...")
Exemple #3
0
	def run_parallel_snippy(self, commands):
		self.logger.info("Running snippy on reads with parallel and {} threads".format(self.threads))
		if len(commands) > 0:
			ec = snpiphy.run_command([
									'parallel',
									'-j', str(self.threads),
									':::'] + commands
									)
			if ec != 0:
				self.logger.error("Error running snippy on one of the reads files. Please check your files and error output.")
				sys.exit(1)
Exemple #4
0
	def run_snippy(self, read_path, reads_type):
		name = snpiphy.get_samplename(read_path)
		self.logger.info("{}: Running snippy...\n".format(name))
		if reads_type == 'P':
			add_cmd = ['--peil', read_path]
		elif reads_type == 'S':
			add_cmd = ['--se', read_path]
		elif reads_type == 'A':
			add_cmd = ['--ctgs', read_path]
		else:
			self.logger.error("Cannot determine type for sequence file: {}\tPlease check the pairing status in your input reads list provided and ensure it is either A, P or S")
		ec = snpiphy.run_command([
								'snippy',
								'--cpus', str(self.threads),
								'--prefix', name,
								'--outdir', os.path.join(self.ref_aligns, name),
								'--ref', self.reference ] +
								add_cmd )
		if ec != 0:
			self.logger.error("Error running snippy on sequence file: {}. Please check your files and error output.".format(os.path.basename(read_path)))
			sys.exit(1)
		self.logger.info("{}: snippy has completed successfully.\n".format(name))
Exemple #5
0
	def build_initial_tree(self):
		if os.path.isfile(self.init_tree) == False:
			if self.model == True:
				options = ['-m', 'GTRGAMMA']
			else:
				options = ['-V','-m', 'GTRCAT']
			self.logger.info("Building inital phylogenetic tree...")
			ec = snpiphy.run_command([
									"raxmlHPC-PTHREADS",
									"-T",str(self.threads) ] +
									options +
									[ "-p",str(random.randint(10000,99999)),
									"-#","20",
									"-s",os.path.join(self.core_align, 'core.trimmed.aln'),
									"-n","initial_trees",
									'-w', self.initial_trees
									])
			if ec != 0:
				self.logger.error("RAxML initial tree building has failed.")
				sys.exit(1)
			Phylo.convert(os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees"), 'newick', self.init_tree, 'nexus')
		else:
			self.logger.info("Initial RAxML tree has already been generated. Skipping this step...")
Exemple #6
0
	def filter_recombinant_positions(self):
		if os.path.isfile(self.init_filtered_tree) == False:
			if self.model == True:
				options = ['-r', 'GTRGAMMA']
			else:
				options = []
			with snpiphy.cd(self.recomb_filter):
				self.logger.info("Scanning and filtering recombination positions with gubbins...")
				if self.tree_builder == 'fasttree':
					ec = snpiphy.run_command([
											"run_gubbins.py",
											"-v", 
											'--tree_builder', 'fasttree', 
											'-s', self.init_tree,
											] +
											[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), 
											'-c', str(self.threads), 
											os.path.join(self.core_align, 'core.trimmed.aln')
											])
					if ec != 0:
						self.logger.error("Running gubbins using fasttree method has failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.")
						sys.exit(1)
				else:
					ec = snpiphy.run_command([
											"run_gubbins.py",
											"-v"] +
											'-s', self.init_tree,
											options +
											[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'),
											'-c', str(self.threads),
											os.path.join(self.core_align, 'core.trimmed.aln')
											])
					if ec != 0:
						self.logger.warn("Recombination filtering using the RAxML only method has failed. Retrying with FastTree for first iteration.")
						for file in os.listdir(self.recomb_filter):
							if file.startswith('core.trimmed.aln.'):
								os.remove(file)
						ec = snpiphy.run_command([
												"run_gubbins.py",
												"-v", 
												'--tree_builder', 'hybrid', 
												'-s', self.init_tree,
												] +
												options +
												[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), 
												'-c', str(self.threads), 
												os.path.join(self.core_align, 'core.trimmed.aln')
												])
						if ec != 0:
							self.logger.warn("Recombination filtering using hybrid RAxML/FastTree method has failed. Retrying with FastTree for all iterations.")
							for file in os.listdir(self.recomb_filter):
								if file.startswith('core.trimmed.aln.'):
									os.remove(file)
							ec = snpiphy.run_command([
													"run_gubbins.py",
													"-v", 
													'--tree_builder', 'fasttree', 
													'-s', self.init_tree,
													] +
													[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), 
													'-c', str(self.threads), 
													os.path.join(self.core_align, 'core.trimmed.aln')
													])
							if ec != 0:
								self.logger.error("Running gubbins using all methods have failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.")
								sys.exit(1)
			Phylo.convert(os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre"), 'newick', self.init_filtered_tree, 'nexus')
			self.logger.info("Recombination filtering by gubbins has completed successfully.")
		else:
			self.logger.info("Recombination filtering by gubbins has already been done. Skipping this step...")
Exemple #7
0
	def run_snippy_core(self):
		self.logger.info("Building core genome alignment from individual reference alignments...")
		if os.path.isfile(self.raw_core_aln) == False:
			with snpiphy.cd(self.ref_aligns):
				self.logger.debug("Running snippy-core...")
				ec = snpiphy.run_command((
										[
										"snippy-core", '--ref', self.reference
										]+self.sample_names
										))
				if ec != 0:
					self.logger.error("Error building core alignment before distant sequences removed. Please check your files and error output.")
					sys.exit(1)
				self.logger.debug("Examining snippy core output for poorly aligned sequences...")
				core_data_handle = open("core.txt", 'r')
				exclude_log = open(os.path.join(self.excluded_seqs, "removed_sequences.log"), 'w')
				bad_seqs = []
				for line in core_data_handle:
					line_data = line.strip().split("\t")
					if line_data[0] == "ID" or line_data[0] == "Reference":
						continue
					coverage = 100 * float(line_data[2]) / float(line_data[1])
					if coverage < self.cutoff:
						archive = "{}.alignment.tar.gz".format(line_data[0])
						# reads_file = snpiphy.find_source_file(line_data[0], self.reads_dir)
						# moved_reads_file = os.path.join(self.excluded_seqs, os.path.basename(reads_file))
						moved_archive = os.path.join(self.excluded_seqs, "{}.tar.gz".format(line_data[0]))
						self.logger.info("Sample {} coverage is too low ({:.2f}%), removing from core alignment. Reads and reference mapping data will be retained in archive: {}".format(line_data[0],coverage,archive))
						exclude_log.write("Sample {} coverage is too low ({:.2f}%), removing from core alignment. Reads and reference mapping data will be retained in archive: {}\n".format(line_data[0],coverage,archive))
						exitcode = snpiphy.run_command(["tar", "cvzf", archive, line_data[0]])
						if ec != 0:
							self.logger.error("Error compressing excluded alignment for sample: {}. Please check your files and error output.".format(line_data[0]))
							sys.exit(1)
						# os.rename(reads_file, moved_reads_file)
						os.rename(archive, moved_archive)
						shutil.rmtree(line_data[0])
						bad_seqs.append(line_data[0])
					else:
						self.logger.debug("Sample {} coverage is ok ({:.2f}%)".format(line_data[0],coverage))
				core_data_handle.close()
				exclude_log.close()
				self.logger.debug("Deleting first core alignment...")
				alignment_files = [os.path.join(self.ref_aligns, item) for item in os.listdir(self.ref_aligns) if os.path.isfile(item)]
				for file in alignment_files:
					if file.startswith("core"):
						os.remove(file)
				self.logger.debug("Running snippy-core again...")
				ec = snpiphy.run_command([
										"snippy-core", '--ref', self.reference
										]+[
										x for x in self.sample_names if (x in bad_seqs) == False
										])
				if ec != 0:
					self.logger.error("Error building core alignment after distant sequences removed. Please check your files and error output.")
					sys.exit(1)
				self.logger.debug("Moving core alignments to {}".format(self.core_align))
				alignment_files = [os.path.join(self.ref_aligns, item) for item in os.listdir(self.ref_aligns) if os.path.isfile(item)]
				for file in alignment_files:
					if os.path.basename(file).startswith("core"):
						shutil.move(file, self.core_align)
				shutil.move(os.path.join(self.core_align, "core.aln"), self.raw_core_aln)
				os.symlink(self.raw_core_aln, os.path.join(self.core_align, "core.aln"))
		else:
			self.logger.info("Core alignment has already been generated. Skipping this step...")